i386.c (ix86_option_override_internal): Allow -mabi for 32-bit, too.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
63
64 enum upper_128bits_state
65 {
66 unknown = 0,
67 unused,
68 used
69 };
70
71 typedef struct block_info_def
72 {
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
85
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87
88 enum call_avx256_state
89 {
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
101 };
102
103 /* Check if a 256bit AVX register is referenced in stores. */
104
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 {
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 {
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
117 }
118 }
119
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
123
124 STATE is state of the upper 128bits of AVX registers at entry. */
125
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
129 {
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
135
136 if (BLOCK_INFO (bb)->unchanged)
137 {
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
141
142 BLOCK_INFO (bb)->state = state;
143 return;
144 }
145
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 {
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
152 }
153
154 BLOCK_INFO (bb)->prev = state;
155
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
159
160 unchanged = true;
161
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
166 {
167 insn = NEXT_INSN (insn);
168
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
171
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
174 {
175 if (!vzeroupper_insn)
176 continue;
177
178 if (PREV_INSN (insn) != vzeroupper_insn)
179 {
180 if (dump_file)
181 {
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
186 }
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
189 }
190 vzeroupper_insn = NULL_RTX;
191 continue;
192 }
193
194 pat = PATTERN (insn);
195
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
199 {
200 if (dump_file)
201 {
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
205 }
206 }
207 else
208 {
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
213 {
214 state = unused;
215 unchanged = false;
216
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
219 {
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
222 }
223 }
224 else if (state != used)
225 {
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
229 }
230 continue;
231 }
232
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
235
236 if (state == unused)
237 {
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
242 {
243 state = used;
244 unchanged = false;
245 }
246
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
250 {
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
253 }
254 delete_insn (insn);
255 }
256 else
257 {
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
262
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
265 {
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
269 {
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
272 }
273 delete_insn (insn);
274 }
275 else
276 {
277 vzeroupper_insn = insn;
278 unchanged = false;
279 }
280 }
281 }
282
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
286
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
291 }
292
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
297
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
300 {
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
305
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
309
310 if (BLOCK_INFO (block)->processed)
311 return false;
312
313 state = unused;
314
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
318 {
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
322 {
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
331 }
332 }
333
334 if (seen_unknown)
335 state = unknown;
336
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
341
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
344
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
348 {
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
352 }
353 else
354 return false;
355 }
356
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
360
361 static void
362 move_or_delete_vzeroupper (void)
363 {
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
372
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
375
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
379
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 {
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
386 }
387
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
396
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
403
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
410 {
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
413 }
414
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
417
418 while (!fibheap_empty (pending))
419 {
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
426
427 sbitmap_zero (visited);
428
429 cfun->machine->rescan_vzeroupper_p = 0;
430
431 while (!fibheap_empty (worklist))
432 {
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
437 {
438 edge_iterator ei;
439
440 SET_BIT (visited, bb->index);
441
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 {
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
448
449 if (TEST_BIT (visited, e->dest->index))
450 {
451 if (!TEST_BIT (in_pending, e->dest->index))
452 {
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
458 }
459 }
460 else if (!TEST_BIT (in_worklist, e->dest->index))
461 {
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
466 }
467 }
468 }
469 }
470
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
473 }
474
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
481
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
484
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
487
488 free_aux_for_blocks ();
489 }
490
491 static rtx legitimize_dllimport_symbol (rtx, bool);
492
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
496
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
504
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
508
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
579 };
580
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
722 };
723
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
792 };
793
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
869 };
870
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
902
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
940 };
941
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1013 };
1014
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1086 };
1087
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1165 };
1166
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1252 };
1253
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1339 };
1340
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1397 time). */
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1406
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1426 };
1427
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1447 9, /* MOVE_RATIO */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1469 /* On K8:
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1472 On AMDFAM10:
1473 MOVD reg64, xmmreg Double FADD 3
1474 1/1 1/1
1475 MOVD reg32, xmmreg Double FADD 3
1476 1/1 1/1 */
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1488
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1508 };
1509
1510 static const
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1530 6, /* MOVE_RATIO */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1566 {-1, libcall}}},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1579 };
1580
1581 static const
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1638 {-1, libcall}}},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1652 };
1653
1654 static const
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1725 };
1726
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1728 static const
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1802 };
1803
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1805 Athlon and K8. */
1806 static const
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1874 };
1875
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1877
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1895
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1908
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1911
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1915
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1918
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1927
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1930
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1932 m_486 | m_PENT,
1933
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1936
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1941 0,
1942
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1944 ~m_386,
1945
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1948
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1952
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1961 m_PPRO,
1962
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1965
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1968
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1971
1972 /* X86_TUNE_USE_MOV0 */
1973 m_K6,
1974
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1977
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1979 m_PENT4,
1980
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1982 m_PPRO,
1983
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1985 ~m_PENT,
1986
1987 /* X86_TUNE_READ_MODIFY */
1988 ~(m_PENT | m_PPRO),
1989
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1992
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1995
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1998
1999 /* X86_TUNE_QIMODE_MATH */
2000 ~0,
2001
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2006 ~m_PPRO,
2007
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2009 0,
2010
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2012 m_PPRO,
2013
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2017
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2020 m_PENT,
2021
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2025
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2029
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2033
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2036
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2047
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2050
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2052 m_COREI7 | m_BDVER,
2053
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2055 m_BDVER ,
2056
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2061 m_ATHLON_K8,
2062
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2064 m_AMD_MULTIPLE,
2065
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2068
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2071
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2074
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2077
2078 /* X86_TUNE_SHIFT1 */
2079 ~m_486,
2080
2081 /* X86_TUNE_USE_FFREEP */
2082 m_AMD_MULTIPLE,
2083
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2086
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2089
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2093
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2096
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2099
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2102
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2105
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2107 m_ATOM,
2108
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2111
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2113 ~m_K8,
2114
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2117
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2120 ~(m_386 | m_486),
2121
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2125
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2127 machines. */
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2129
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2131 than a MOV. */
2132 m_PENT,
2133
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2136 m_PENT,
2137
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2141 m_K6,
2142
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2144 from FP to FP. */
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2146
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2149 m_AMDFAM10,
2150
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2154 m_BDVER,
2155
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2158 m_ATOM,
2159
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2161 instructions. */
2162 ~m_ATOM,
2163
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2166 chips. */
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2168
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2171 m_BDVER
2172 };
2173
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2176
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2182
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2184 ~m_386,
2185
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2187 ~(m_386 | m_486),
2188
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2190 ~m_386,
2191
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2193 ~m_386,
2194 };
2195
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2198
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2201
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2204
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2207
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2210 epilogue code. */
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2212
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2217
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2220
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2227 /* FP registers */
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2230 /* arg pointer */
2231 NON_Q_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2234 /* SSE registers */
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2236 SSE_REGS, SSE_REGS,
2237 /* MMX registers */
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2239 MMX_REGS, MMX_REGS,
2240 /* REX registers */
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2245 SSE_REGS, SSE_REGS,
2246 };
2247
2248 /* The "default" register map used in 32bit mode. */
2249
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2251 {
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2259 };
2260
2261 /* The "default" register map used in 64bit mode. */
2262
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2264 {
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2272 };
2273
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2318 numbers.
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2327 */
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2329 {
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2337 };
2338
2339 /* Define parameter passing and return registers. */
2340
2341 static int const x86_64_int_parameter_registers[6] =
2342 {
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2344 };
2345
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2347 {
2348 CX_REG, DX_REG, R8_REG, R9_REG
2349 };
2350
2351 static int const x86_64_int_return_registers[4] =
2352 {
2353 AX_REG, DX_REG, DI_REG, SI_REG
2354 };
2355
2356 /* Define the structure for the machine field in struct function. */
2357
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2360 unsigned short n;
2361 rtx rtl;
2362 struct stack_local_entry *next;
2363 };
2364
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2367
2368 [arguments]
2369 <- ARG_POINTER
2370 saved pc
2371
2372 saved static chain if ix86_static_chain_on_stack
2373
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2376 [saved regs]
2377 <- regs_save_offset
2378 [padding0]
2379
2380 [saved SSE regs]
2381 <- sse_regs_save_offset
2382 [padding1] |
2383 | <- FRAME_POINTER
2384 [va_arg registers] |
2385 |
2386 [frame] |
2387 |
2388 [padding2] | = to_allocate
2389 <- STACK_POINTER
2390 */
2391 struct ix86_frame
2392 {
2393 int nsseregs;
2394 int nregs;
2395 int va_arg_size;
2396 int red_zone_size;
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2399
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2407
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2411 };
2412
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2415
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2418
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2421
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2424
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2428
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2439
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2442
2443 /* Alignment for incoming stack boundary in bits specified at
2444 command line. */
2445 static unsigned int ix86_user_incoming_stack_boundary;
2446
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2449
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2452
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2456
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2460
2461 /* Fence to use after loop using movnt. */
2462 tree x86_mfence;
2463
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2468
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2472 {
2473 X86_64_NO_CLASS,
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2476 X86_64_SSE_CLASS,
2477 X86_64_SSESF_CLASS,
2478 X86_64_SSEDF_CLASS,
2479 X86_64_SSEUP_CLASS,
2480 X86_64_X87_CLASS,
2481 X86_64_X87UP_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2483 X86_64_MEMORY_CLASS
2484 };
2485
2486 #define MAX_CLASSES 4
2487
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2491
2492 \f
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2497 const_tree);
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2502 rtx, rtx, int);
2503 static void ix86_add_new_builtins (int);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2509
2510 enum ix86_function_specific_strings
2511 {
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2515 };
2516
2517 static char *ix86_target_string (int, int, const char *, const char *,
2518 enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2530
2531 static enum calling_abi ix86_function_abi (const_tree);
2532
2533 \f
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2536 #endif
2537
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2539 in memory. */
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2542 #endif
2543
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2547
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2550
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2553
2554 /* Processor target table, indexed by processor number */
2555 struct ptt
2556 {
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2563 };
2564
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2566 {
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2592 };
2593
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2595 {
2596 "generic",
2597 "i386",
2598 "i486",
2599 "pentium",
2600 "pentium-mmx",
2601 "pentiumpro",
2602 "pentium2",
2603 "pentium3",
2604 "pentium4",
2605 "pentium-m",
2606 "prescott",
2607 "nocona",
2608 "core2",
2609 "corei7",
2610 "atom",
2611 "geode",
2612 "k6",
2613 "k6-2",
2614 "k6-3",
2615 "athlon",
2616 "athlon-4",
2617 "k8",
2618 "amdfam10",
2619 "bdver1",
2620 "bdver2",
2621 "btver1"
2622 };
2623 \f
2624 /* Return true if a red-zone is in use. */
2625
2626 static inline bool
2627 ix86_using_red_zone (void)
2628 {
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2630 }
2631 \f
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2634
2635 static char *
2636 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2637 enum fpmath_unit fpmath, bool add_nl_p)
2638 {
2639 struct ix86_target_opts
2640 {
2641 const char *option; /* option string */
2642 int mask; /* isa mask options */
2643 };
2644
2645 /* This table is ordered so that options like -msse4.2 that imply
2646 preceding options while match those first. */
2647 static struct ix86_target_opts isa_opts[] =
2648 {
2649 { "-m64", OPTION_MASK_ISA_64BIT },
2650 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2651 { "-mfma", OPTION_MASK_ISA_FMA },
2652 { "-mxop", OPTION_MASK_ISA_XOP },
2653 { "-mlwp", OPTION_MASK_ISA_LWP },
2654 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2655 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2656 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2657 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2658 { "-msse3", OPTION_MASK_ISA_SSE3 },
2659 { "-msse2", OPTION_MASK_ISA_SSE2 },
2660 { "-msse", OPTION_MASK_ISA_SSE },
2661 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2662 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2663 { "-mmmx", OPTION_MASK_ISA_MMX },
2664 { "-mabm", OPTION_MASK_ISA_ABM },
2665 { "-mbmi", OPTION_MASK_ISA_BMI },
2666 { "-mtbm", OPTION_MASK_ISA_TBM },
2667 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2668 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2669 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2670 { "-maes", OPTION_MASK_ISA_AES },
2671 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2672 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2673 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2674 { "-mf16c", OPTION_MASK_ISA_F16C },
2675 };
2676
2677 /* Flag options. */
2678 static struct ix86_target_opts flag_opts[] =
2679 {
2680 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2681 { "-m80387", MASK_80387 },
2682 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2683 { "-malign-double", MASK_ALIGN_DOUBLE },
2684 { "-mcld", MASK_CLD },
2685 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2686 { "-mieee-fp", MASK_IEEE_FP },
2687 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2688 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2689 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2690 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2691 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2692 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2693 { "-mno-red-zone", MASK_NO_RED_ZONE },
2694 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2695 { "-mrecip", MASK_RECIP },
2696 { "-mrtd", MASK_RTD },
2697 { "-msseregparm", MASK_SSEREGPARM },
2698 { "-mstack-arg-probe", MASK_STACK_PROBE },
2699 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2700 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2701 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2702 { "-mvzeroupper", MASK_VZEROUPPER },
2703 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2704 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2705 { "-mprefer-avx128", MASK_PREFER_AVX128},
2706 };
2707
2708 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2709
2710 char isa_other[40];
2711 char target_other[40];
2712 unsigned num = 0;
2713 unsigned i, j;
2714 char *ret;
2715 char *ptr;
2716 size_t len;
2717 size_t line_len;
2718 size_t sep_len;
2719
2720 memset (opts, '\0', sizeof (opts));
2721
2722 /* Add -march= option. */
2723 if (arch)
2724 {
2725 opts[num][0] = "-march=";
2726 opts[num++][1] = arch;
2727 }
2728
2729 /* Add -mtune= option. */
2730 if (tune)
2731 {
2732 opts[num][0] = "-mtune=";
2733 opts[num++][1] = tune;
2734 }
2735
2736 /* Pick out the options in isa options. */
2737 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2738 {
2739 if ((isa & isa_opts[i].mask) != 0)
2740 {
2741 opts[num++][0] = isa_opts[i].option;
2742 isa &= ~ isa_opts[i].mask;
2743 }
2744 }
2745
2746 if (isa && add_nl_p)
2747 {
2748 opts[num++][0] = isa_other;
2749 sprintf (isa_other, "(other isa: %#x)", isa);
2750 }
2751
2752 /* Add flag options. */
2753 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2754 {
2755 if ((flags & flag_opts[i].mask) != 0)
2756 {
2757 opts[num++][0] = flag_opts[i].option;
2758 flags &= ~ flag_opts[i].mask;
2759 }
2760 }
2761
2762 if (flags && add_nl_p)
2763 {
2764 opts[num++][0] = target_other;
2765 sprintf (target_other, "(other flags: %#x)", flags);
2766 }
2767
2768 /* Add -fpmath= option. */
2769 if (fpmath)
2770 {
2771 opts[num][0] = "-mfpmath=";
2772 switch ((int) fpmath)
2773 {
2774 case FPMATH_387:
2775 opts[num++][1] = "387";
2776 break;
2777
2778 case FPMATH_SSE:
2779 opts[num++][1] = "sse";
2780 break;
2781
2782 case FPMATH_387 | FPMATH_SSE:
2783 opts[num++][1] = "sse+387";
2784 break;
2785
2786 default:
2787 gcc_unreachable ();
2788 }
2789 }
2790
2791 /* Any options? */
2792 if (num == 0)
2793 return NULL;
2794
2795 gcc_assert (num < ARRAY_SIZE (opts));
2796
2797 /* Size the string. */
2798 len = 0;
2799 sep_len = (add_nl_p) ? 3 : 1;
2800 for (i = 0; i < num; i++)
2801 {
2802 len += sep_len;
2803 for (j = 0; j < 2; j++)
2804 if (opts[i][j])
2805 len += strlen (opts[i][j]);
2806 }
2807
2808 /* Build the string. */
2809 ret = ptr = (char *) xmalloc (len);
2810 line_len = 0;
2811
2812 for (i = 0; i < num; i++)
2813 {
2814 size_t len2[2];
2815
2816 for (j = 0; j < 2; j++)
2817 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2818
2819 if (i != 0)
2820 {
2821 *ptr++ = ' ';
2822 line_len++;
2823
2824 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2825 {
2826 *ptr++ = '\\';
2827 *ptr++ = '\n';
2828 line_len = 0;
2829 }
2830 }
2831
2832 for (j = 0; j < 2; j++)
2833 if (opts[i][j])
2834 {
2835 memcpy (ptr, opts[i][j], len2[j]);
2836 ptr += len2[j];
2837 line_len += len2[j];
2838 }
2839 }
2840
2841 *ptr = '\0';
2842 gcc_assert (ret + len >= ptr);
2843
2844 return ret;
2845 }
2846
2847 /* Return true, if profiling code should be emitted before
2848 prologue. Otherwise it returns false.
2849 Note: For x86 with "hotfix" it is sorried. */
2850 static bool
2851 ix86_profile_before_prologue (void)
2852 {
2853 return flag_fentry != 0;
2854 }
2855
2856 /* Function that is callable from the debugger to print the current
2857 options. */
2858 void
2859 ix86_debug_options (void)
2860 {
2861 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2862 ix86_arch_string, ix86_tune_string,
2863 ix86_fpmath, true);
2864
2865 if (opts)
2866 {
2867 fprintf (stderr, "%s\n\n", opts);
2868 free (opts);
2869 }
2870 else
2871 fputs ("<no options>\n\n", stderr);
2872
2873 return;
2874 }
2875 \f
2876 /* Override various settings based on options. If MAIN_ARGS_P, the
2877 options are from the command line, otherwise they are from
2878 attributes. */
2879
2880 static void
2881 ix86_option_override_internal (bool main_args_p)
2882 {
2883 int i;
2884 unsigned int ix86_arch_mask, ix86_tune_mask;
2885 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2886 const char *prefix;
2887 const char *suffix;
2888 const char *sw;
2889
2890 enum pta_flags
2891 {
2892 PTA_SSE = 1 << 0,
2893 PTA_SSE2 = 1 << 1,
2894 PTA_SSE3 = 1 << 2,
2895 PTA_MMX = 1 << 3,
2896 PTA_PREFETCH_SSE = 1 << 4,
2897 PTA_3DNOW = 1 << 5,
2898 PTA_3DNOW_A = 1 << 6,
2899 PTA_64BIT = 1 << 7,
2900 PTA_SSSE3 = 1 << 8,
2901 PTA_CX16 = 1 << 9,
2902 PTA_POPCNT = 1 << 10,
2903 PTA_ABM = 1 << 11,
2904 PTA_SSE4A = 1 << 12,
2905 PTA_NO_SAHF = 1 << 13,
2906 PTA_SSE4_1 = 1 << 14,
2907 PTA_SSE4_2 = 1 << 15,
2908 PTA_AES = 1 << 16,
2909 PTA_PCLMUL = 1 << 17,
2910 PTA_AVX = 1 << 18,
2911 PTA_FMA = 1 << 19,
2912 PTA_MOVBE = 1 << 20,
2913 PTA_FMA4 = 1 << 21,
2914 PTA_XOP = 1 << 22,
2915 PTA_LWP = 1 << 23,
2916 PTA_FSGSBASE = 1 << 24,
2917 PTA_RDRND = 1 << 25,
2918 PTA_F16C = 1 << 26,
2919 PTA_BMI = 1 << 27,
2920 PTA_TBM = 1 << 28
2921 /* if this reaches 32, need to widen struct pta flags below */
2922 };
2923
2924 static struct pta
2925 {
2926 const char *const name; /* processor name or nickname. */
2927 const enum processor_type processor;
2928 const enum attr_cpu schedule;
2929 const unsigned /*enum pta_flags*/ flags;
2930 }
2931 const processor_alias_table[] =
2932 {
2933 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2934 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2935 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2936 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2937 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2938 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2939 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2940 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2941 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2942 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2943 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2944 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2945 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2946 PTA_MMX | PTA_SSE},
2947 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2948 PTA_MMX | PTA_SSE},
2949 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2950 PTA_MMX | PTA_SSE | PTA_SSE2},
2951 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2952 PTA_MMX |PTA_SSE | PTA_SSE2},
2953 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2954 PTA_MMX | PTA_SSE | PTA_SSE2},
2955 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2956 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2957 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2958 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2959 | PTA_CX16 | PTA_NO_SAHF},
2960 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2961 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2962 | PTA_SSSE3 | PTA_CX16},
2963 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2964 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2965 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2966 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2967 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2968 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2969 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2970 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2971 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2972 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2973 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2974 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2975 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2976 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2977 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2978 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2979 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2980 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2981 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2982 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2983 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2984 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2985 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2986 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2987 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2988 {"x86-64", PROCESSOR_K8, CPU_K8,
2989 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2990 {"k8", PROCESSOR_K8, CPU_K8,
2991 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2992 | PTA_SSE2 | PTA_NO_SAHF},
2993 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2994 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2995 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2996 {"opteron", PROCESSOR_K8, CPU_K8,
2997 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2998 | PTA_SSE2 | PTA_NO_SAHF},
2999 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3000 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3001 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3002 {"athlon64", PROCESSOR_K8, CPU_K8,
3003 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3004 | PTA_SSE2 | PTA_NO_SAHF},
3005 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3006 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3007 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3008 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3009 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3010 | PTA_SSE2 | PTA_NO_SAHF},
3011 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3012 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3013 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3014 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3015 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3016 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3017 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3018 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3019 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3020 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3021 | PTA_XOP | PTA_LWP},
3022 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3023 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3024 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3025 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3026 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3027 | PTA_FMA},
3028 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3029 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3030 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3031 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3032 0 /* flags are only used for -march switch. */ },
3033 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3034 PTA_64BIT /* flags are only used for -march switch. */ },
3035 };
3036
3037 int const pta_size = ARRAY_SIZE (processor_alias_table);
3038
3039 /* Set up prefix/suffix so the error messages refer to either the command
3040 line argument, or the attribute(target). */
3041 if (main_args_p)
3042 {
3043 prefix = "-m";
3044 suffix = "";
3045 sw = "switch";
3046 }
3047 else
3048 {
3049 prefix = "option(\"";
3050 suffix = "\")";
3051 sw = "attribute";
3052 }
3053
3054 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3055 SUBTARGET_OVERRIDE_OPTIONS;
3056 #endif
3057
3058 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3059 SUBSUBTARGET_OVERRIDE_OPTIONS;
3060 #endif
3061
3062 if (TARGET_X32)
3063 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3064
3065 /* -fPIC is the default for x86_64. */
3066 if (TARGET_MACHO && TARGET_64BIT)
3067 flag_pic = 2;
3068
3069 /* Need to check -mtune=generic first. */
3070 if (ix86_tune_string)
3071 {
3072 if (!strcmp (ix86_tune_string, "generic")
3073 || !strcmp (ix86_tune_string, "i686")
3074 /* As special support for cross compilers we read -mtune=native
3075 as -mtune=generic. With native compilers we won't see the
3076 -mtune=native, as it was changed by the driver. */
3077 || !strcmp (ix86_tune_string, "native"))
3078 {
3079 if (TARGET_64BIT)
3080 ix86_tune_string = "generic64";
3081 else
3082 ix86_tune_string = "generic32";
3083 }
3084 /* If this call is for setting the option attribute, allow the
3085 generic32/generic64 that was previously set. */
3086 else if (!main_args_p
3087 && (!strcmp (ix86_tune_string, "generic32")
3088 || !strcmp (ix86_tune_string, "generic64")))
3089 ;
3090 else if (!strncmp (ix86_tune_string, "generic", 7))
3091 error ("bad value (%s) for %stune=%s %s",
3092 ix86_tune_string, prefix, suffix, sw);
3093 else if (!strcmp (ix86_tune_string, "x86-64"))
3094 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3095 "%stune=k8%s or %stune=generic%s instead as appropriate",
3096 prefix, suffix, prefix, suffix, prefix, suffix);
3097 }
3098 else
3099 {
3100 if (ix86_arch_string)
3101 ix86_tune_string = ix86_arch_string;
3102 if (!ix86_tune_string)
3103 {
3104 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3105 ix86_tune_defaulted = 1;
3106 }
3107
3108 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3109 need to use a sensible tune option. */
3110 if (!strcmp (ix86_tune_string, "generic")
3111 || !strcmp (ix86_tune_string, "x86-64")
3112 || !strcmp (ix86_tune_string, "i686"))
3113 {
3114 if (TARGET_64BIT)
3115 ix86_tune_string = "generic64";
3116 else
3117 ix86_tune_string = "generic32";
3118 }
3119 }
3120
3121 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3122 {
3123 /* rep; movq isn't available in 32-bit code. */
3124 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3125 ix86_stringop_alg = no_stringop;
3126 }
3127
3128 if (!ix86_arch_string)
3129 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3130 else
3131 ix86_arch_specified = 1;
3132
3133 if (!global_options_set.x_ix86_abi)
3134 ix86_abi = DEFAULT_ABI;
3135
3136 if (global_options_set.x_ix86_cmodel)
3137 {
3138 switch (ix86_cmodel)
3139 {
3140 case CM_SMALL:
3141 case CM_SMALL_PIC:
3142 if (flag_pic)
3143 ix86_cmodel = CM_SMALL_PIC;
3144 if (!TARGET_64BIT)
3145 error ("code model %qs not supported in the %s bit mode",
3146 "small", "32");
3147 break;
3148
3149 case CM_MEDIUM:
3150 case CM_MEDIUM_PIC:
3151 if (flag_pic)
3152 ix86_cmodel = CM_MEDIUM_PIC;
3153 if (!TARGET_64BIT)
3154 error ("code model %qs not supported in the %s bit mode",
3155 "medium", "32");
3156 else if (TARGET_X32)
3157 error ("code model %qs not supported in x32 mode",
3158 "medium");
3159 break;
3160
3161 case CM_LARGE:
3162 case CM_LARGE_PIC:
3163 if (flag_pic)
3164 ix86_cmodel = CM_LARGE_PIC;
3165 if (!TARGET_64BIT)
3166 error ("code model %qs not supported in the %s bit mode",
3167 "large", "32");
3168 else if (TARGET_X32)
3169 error ("code model %qs not supported in x32 mode",
3170 "medium");
3171 break;
3172
3173 case CM_32:
3174 if (flag_pic)
3175 error ("code model %s does not support PIC mode", "32");
3176 if (TARGET_64BIT)
3177 error ("code model %qs not supported in the %s bit mode",
3178 "32", "64");
3179 break;
3180
3181 case CM_KERNEL:
3182 if (flag_pic)
3183 {
3184 error ("code model %s does not support PIC mode", "kernel");
3185 ix86_cmodel = CM_32;
3186 }
3187 if (!TARGET_64BIT)
3188 error ("code model %qs not supported in the %s bit mode",
3189 "kernel", "32");
3190 break;
3191
3192 default:
3193 gcc_unreachable ();
3194 }
3195 }
3196 else
3197 {
3198 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3199 use of rip-relative addressing. This eliminates fixups that
3200 would otherwise be needed if this object is to be placed in a
3201 DLL, and is essentially just as efficient as direct addressing. */
3202 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3203 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3204 else if (TARGET_64BIT)
3205 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3206 else
3207 ix86_cmodel = CM_32;
3208 }
3209 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3210 {
3211 error ("-masm=intel not supported in this configuration");
3212 ix86_asm_dialect = ASM_ATT;
3213 }
3214 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3215 sorry ("%i-bit mode not compiled in",
3216 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3217
3218 for (i = 0; i < pta_size; i++)
3219 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3220 {
3221 ix86_schedule = processor_alias_table[i].schedule;
3222 ix86_arch = processor_alias_table[i].processor;
3223 /* Default cpu tuning to the architecture. */
3224 ix86_tune = ix86_arch;
3225
3226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3227 error ("CPU you selected does not support x86-64 "
3228 "instruction set");
3229
3230 if (processor_alias_table[i].flags & PTA_MMX
3231 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3232 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3233 if (processor_alias_table[i].flags & PTA_3DNOW
3234 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3235 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3236 if (processor_alias_table[i].flags & PTA_3DNOW_A
3237 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3238 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3239 if (processor_alias_table[i].flags & PTA_SSE
3240 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3241 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3242 if (processor_alias_table[i].flags & PTA_SSE2
3243 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3244 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3245 if (processor_alias_table[i].flags & PTA_SSE3
3246 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3247 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3248 if (processor_alias_table[i].flags & PTA_SSSE3
3249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3250 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3251 if (processor_alias_table[i].flags & PTA_SSE4_1
3252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3253 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3254 if (processor_alias_table[i].flags & PTA_SSE4_2
3255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3256 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3257 if (processor_alias_table[i].flags & PTA_AVX
3258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3259 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3260 if (processor_alias_table[i].flags & PTA_FMA
3261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3262 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3263 if (processor_alias_table[i].flags & PTA_SSE4A
3264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3265 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3266 if (processor_alias_table[i].flags & PTA_FMA4
3267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3268 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3269 if (processor_alias_table[i].flags & PTA_XOP
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3271 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3272 if (processor_alias_table[i].flags & PTA_LWP
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3274 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3275 if (processor_alias_table[i].flags & PTA_ABM
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3277 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3278 if (processor_alias_table[i].flags & PTA_BMI
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3280 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3281 if (processor_alias_table[i].flags & PTA_TBM
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3283 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3284 if (processor_alias_table[i].flags & PTA_CX16
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3286 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3287 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3289 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3290 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3292 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3293 if (processor_alias_table[i].flags & PTA_MOVBE
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3295 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3296 if (processor_alias_table[i].flags & PTA_AES
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3298 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3299 if (processor_alias_table[i].flags & PTA_PCLMUL
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3301 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3302 if (processor_alias_table[i].flags & PTA_FSGSBASE
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3304 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3305 if (processor_alias_table[i].flags & PTA_RDRND
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3307 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3308 if (processor_alias_table[i].flags & PTA_F16C
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3310 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3311 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3312 x86_prefetch_sse = true;
3313
3314 break;
3315 }
3316
3317 if (!strcmp (ix86_arch_string, "generic"))
3318 error ("generic CPU can be used only for %stune=%s %s",
3319 prefix, suffix, sw);
3320 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3321 error ("bad value (%s) for %sarch=%s %s",
3322 ix86_arch_string, prefix, suffix, sw);
3323
3324 ix86_arch_mask = 1u << ix86_arch;
3325 for (i = 0; i < X86_ARCH_LAST; ++i)
3326 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3327
3328 for (i = 0; i < pta_size; i++)
3329 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3330 {
3331 ix86_schedule = processor_alias_table[i].schedule;
3332 ix86_tune = processor_alias_table[i].processor;
3333 if (TARGET_64BIT)
3334 {
3335 if (!(processor_alias_table[i].flags & PTA_64BIT))
3336 {
3337 if (ix86_tune_defaulted)
3338 {
3339 ix86_tune_string = "x86-64";
3340 for (i = 0; i < pta_size; i++)
3341 if (! strcmp (ix86_tune_string,
3342 processor_alias_table[i].name))
3343 break;
3344 ix86_schedule = processor_alias_table[i].schedule;
3345 ix86_tune = processor_alias_table[i].processor;
3346 }
3347 else
3348 error ("CPU you selected does not support x86-64 "
3349 "instruction set");
3350 }
3351 }
3352 else
3353 {
3354 /* Adjust tuning when compiling for 32-bit ABI. */
3355 switch (ix86_tune)
3356 {
3357 case PROCESSOR_GENERIC64:
3358 ix86_tune = PROCESSOR_GENERIC32;
3359 ix86_schedule = CPU_PENTIUMPRO;
3360 break;
3361
3362 case PROCESSOR_CORE2_64:
3363 ix86_tune = PROCESSOR_CORE2_32;
3364 break;
3365
3366 case PROCESSOR_COREI7_64:
3367 ix86_tune = PROCESSOR_COREI7_32;
3368 break;
3369
3370 default:
3371 break;
3372 }
3373 }
3374 /* Intel CPUs have always interpreted SSE prefetch instructions as
3375 NOPs; so, we can enable SSE prefetch instructions even when
3376 -mtune (rather than -march) points us to a processor that has them.
3377 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3378 higher processors. */
3379 if (TARGET_CMOVE
3380 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3381 x86_prefetch_sse = true;
3382 break;
3383 }
3384
3385 if (ix86_tune_specified && i == pta_size)
3386 error ("bad value (%s) for %stune=%s %s",
3387 ix86_tune_string, prefix, suffix, sw);
3388
3389 ix86_tune_mask = 1u << ix86_tune;
3390 for (i = 0; i < X86_TUNE_LAST; ++i)
3391 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3392
3393 #ifndef USE_IX86_FRAME_POINTER
3394 #define USE_IX86_FRAME_POINTER 0
3395 #endif
3396
3397 #ifndef USE_X86_64_FRAME_POINTER
3398 #define USE_X86_64_FRAME_POINTER 0
3399 #endif
3400
3401 /* Set the default values for switches whose default depends on TARGET_64BIT
3402 in case they weren't overwritten by command line options. */
3403 if (TARGET_64BIT)
3404 {
3405 if (optimize > 1 && !global_options_set.x_flag_zee)
3406 flag_zee = 1;
3407 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3408 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3409 if (flag_asynchronous_unwind_tables == 2)
3410 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3411 if (flag_pcc_struct_return == 2)
3412 flag_pcc_struct_return = 0;
3413 }
3414 else
3415 {
3416 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3417 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3418 if (flag_asynchronous_unwind_tables == 2)
3419 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3420 if (flag_pcc_struct_return == 2)
3421 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3422 }
3423
3424 if (optimize_size)
3425 ix86_cost = &ix86_size_cost;
3426 else
3427 ix86_cost = processor_target_table[ix86_tune].cost;
3428
3429 /* Arrange to set up i386_stack_locals for all functions. */
3430 init_machine_status = ix86_init_machine_status;
3431
3432 /* Validate -mregparm= value. */
3433 if (global_options_set.x_ix86_regparm)
3434 {
3435 if (TARGET_64BIT)
3436 warning (0, "-mregparm is ignored in 64-bit mode");
3437 if (ix86_regparm > REGPARM_MAX)
3438 {
3439 error ("-mregparm=%d is not between 0 and %d",
3440 ix86_regparm, REGPARM_MAX);
3441 ix86_regparm = 0;
3442 }
3443 }
3444 if (TARGET_64BIT)
3445 ix86_regparm = REGPARM_MAX;
3446
3447 /* Default align_* from the processor table. */
3448 if (align_loops == 0)
3449 {
3450 align_loops = processor_target_table[ix86_tune].align_loop;
3451 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3452 }
3453 if (align_jumps == 0)
3454 {
3455 align_jumps = processor_target_table[ix86_tune].align_jump;
3456 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3457 }
3458 if (align_functions == 0)
3459 {
3460 align_functions = processor_target_table[ix86_tune].align_func;
3461 }
3462
3463 /* Provide default for -mbranch-cost= value. */
3464 if (!global_options_set.x_ix86_branch_cost)
3465 ix86_branch_cost = ix86_cost->branch_cost;
3466
3467 if (TARGET_64BIT)
3468 {
3469 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3470
3471 /* Enable by default the SSE and MMX builtins. Do allow the user to
3472 explicitly disable any of these. In particular, disabling SSE and
3473 MMX for kernel code is extremely useful. */
3474 if (!ix86_arch_specified)
3475 ix86_isa_flags
3476 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3477 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3478
3479 if (TARGET_RTD)
3480 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3481 }
3482 else
3483 {
3484 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3485
3486 if (!ix86_arch_specified)
3487 ix86_isa_flags
3488 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3489
3490 /* i386 ABI does not specify red zone. It still makes sense to use it
3491 when programmer takes care to stack from being destroyed. */
3492 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3493 target_flags |= MASK_NO_RED_ZONE;
3494 }
3495
3496 /* Keep nonleaf frame pointers. */
3497 if (flag_omit_frame_pointer)
3498 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3499 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3500 flag_omit_frame_pointer = 1;
3501
3502 /* If we're doing fast math, we don't care about comparison order
3503 wrt NaNs. This lets us use a shorter comparison sequence. */
3504 if (flag_finite_math_only)
3505 target_flags &= ~MASK_IEEE_FP;
3506
3507 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3508 since the insns won't need emulation. */
3509 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3510 target_flags &= ~MASK_NO_FANCY_MATH_387;
3511
3512 /* Likewise, if the target doesn't have a 387, or we've specified
3513 software floating point, don't use 387 inline intrinsics. */
3514 if (!TARGET_80387)
3515 target_flags |= MASK_NO_FANCY_MATH_387;
3516
3517 /* Turn on MMX builtins for -msse. */
3518 if (TARGET_SSE)
3519 {
3520 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3521 x86_prefetch_sse = true;
3522 }
3523
3524 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3525 if (TARGET_SSE4_2 || TARGET_ABM)
3526 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3527
3528 /* Validate -mpreferred-stack-boundary= value or default it to
3529 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3530 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3531 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3532 {
3533 int min = (TARGET_64BIT ? 4 : 2);
3534 int max = (TARGET_SEH ? 4 : 12);
3535
3536 if (ix86_preferred_stack_boundary_arg < min
3537 || ix86_preferred_stack_boundary_arg > max)
3538 {
3539 if (min == max)
3540 error ("-mpreferred-stack-boundary is not supported "
3541 "for this target");
3542 else
3543 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3544 ix86_preferred_stack_boundary_arg, min, max);
3545 }
3546 else
3547 ix86_preferred_stack_boundary
3548 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3549 }
3550
3551 /* Set the default value for -mstackrealign. */
3552 if (ix86_force_align_arg_pointer == -1)
3553 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3554
3555 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3556
3557 /* Validate -mincoming-stack-boundary= value or default it to
3558 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3559 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3560 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3561 {
3562 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3563 || ix86_incoming_stack_boundary_arg > 12)
3564 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3565 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3566 else
3567 {
3568 ix86_user_incoming_stack_boundary
3569 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3570 ix86_incoming_stack_boundary
3571 = ix86_user_incoming_stack_boundary;
3572 }
3573 }
3574
3575 /* Accept -msseregparm only if at least SSE support is enabled. */
3576 if (TARGET_SSEREGPARM
3577 && ! TARGET_SSE)
3578 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3579
3580 if (global_options_set.x_ix86_fpmath)
3581 {
3582 if (ix86_fpmath & FPMATH_SSE)
3583 {
3584 if (!TARGET_SSE)
3585 {
3586 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3587 ix86_fpmath = FPMATH_387;
3588 }
3589 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3590 {
3591 warning (0, "387 instruction set disabled, using SSE arithmetics");
3592 ix86_fpmath = FPMATH_SSE;
3593 }
3594 }
3595 }
3596 else
3597 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3598
3599 /* If the i387 is disabled, then do not return values in it. */
3600 if (!TARGET_80387)
3601 target_flags &= ~MASK_FLOAT_RETURNS;
3602
3603 /* Use external vectorized library in vectorizing intrinsics. */
3604 if (global_options_set.x_ix86_veclibabi_type)
3605 switch (ix86_veclibabi_type)
3606 {
3607 case ix86_veclibabi_type_svml:
3608 ix86_veclib_handler = ix86_veclibabi_svml;
3609 break;
3610
3611 case ix86_veclibabi_type_acml:
3612 ix86_veclib_handler = ix86_veclibabi_acml;
3613 break;
3614
3615 default:
3616 gcc_unreachable ();
3617 }
3618
3619 if ((!USE_IX86_FRAME_POINTER
3620 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3621 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3622 && !optimize_size)
3623 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3624
3625 /* ??? Unwind info is not correct around the CFG unless either a frame
3626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3627 unwind info generation to be aware of the CFG and propagating states
3628 around edges. */
3629 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3630 || flag_exceptions || flag_non_call_exceptions)
3631 && flag_omit_frame_pointer
3632 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3633 {
3634 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3635 warning (0, "unwind tables currently require either a frame pointer "
3636 "or %saccumulate-outgoing-args%s for correctness",
3637 prefix, suffix);
3638 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3639 }
3640
3641 /* If stack probes are required, the space used for large function
3642 arguments on the stack must also be probed, so enable
3643 -maccumulate-outgoing-args so this happens in the prologue. */
3644 if (TARGET_STACK_PROBE
3645 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3646 {
3647 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3648 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3649 "for correctness", prefix, suffix);
3650 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3651 }
3652
3653 /* For sane SSE instruction set generation we need fcomi instruction.
3654 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3655 expands to a sequence that includes conditional move. */
3656 if (TARGET_SSE || TARGET_RDRND)
3657 TARGET_CMOVE = 1;
3658
3659 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3660 {
3661 char *p;
3662 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3663 p = strchr (internal_label_prefix, 'X');
3664 internal_label_prefix_len = p - internal_label_prefix;
3665 *p = '\0';
3666 }
3667
3668 /* When scheduling description is not available, disable scheduler pass
3669 so it won't slow down the compilation and make x87 code slower. */
3670 if (!TARGET_SCHEDULE)
3671 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3672
3673 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3674 ix86_cost->simultaneous_prefetches,
3675 global_options.x_param_values,
3676 global_options_set.x_param_values);
3677 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3678 global_options.x_param_values,
3679 global_options_set.x_param_values);
3680 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3681 global_options.x_param_values,
3682 global_options_set.x_param_values);
3683 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3684 global_options.x_param_values,
3685 global_options_set.x_param_values);
3686
3687 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3688 if (flag_prefetch_loop_arrays < 0
3689 && HAVE_prefetch
3690 && optimize >= 3
3691 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3692 flag_prefetch_loop_arrays = 1;
3693
3694 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3695 can be optimized to ap = __builtin_next_arg (0). */
3696 if (!TARGET_64BIT && !flag_split_stack)
3697 targetm.expand_builtin_va_start = NULL;
3698
3699 if (TARGET_64BIT)
3700 {
3701 ix86_gen_leave = gen_leave_rex64;
3702 ix86_gen_add3 = gen_adddi3;
3703 ix86_gen_sub3 = gen_subdi3;
3704 ix86_gen_sub3_carry = gen_subdi3_carry;
3705 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3706 ix86_gen_monitor = gen_sse3_monitor64;
3707 ix86_gen_andsp = gen_anddi3;
3708 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3709 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3710 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3711 }
3712 else
3713 {
3714 ix86_gen_leave = gen_leave;
3715 ix86_gen_add3 = gen_addsi3;
3716 ix86_gen_sub3 = gen_subsi3;
3717 ix86_gen_sub3_carry = gen_subsi3_carry;
3718 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3719 ix86_gen_monitor = gen_sse3_monitor;
3720 ix86_gen_andsp = gen_andsi3;
3721 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3722 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3723 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3724 }
3725
3726 #ifdef USE_IX86_CLD
3727 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3728 if (!TARGET_64BIT)
3729 target_flags |= MASK_CLD & ~target_flags_explicit;
3730 #endif
3731
3732 if (!TARGET_64BIT && flag_pic)
3733 {
3734 if (flag_fentry > 0)
3735 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3736 "with -fpic");
3737 flag_fentry = 0;
3738 }
3739 else if (TARGET_SEH)
3740 {
3741 if (flag_fentry == 0)
3742 sorry ("-mno-fentry isn%'t compatible with SEH");
3743 flag_fentry = 1;
3744 }
3745 else if (flag_fentry < 0)
3746 {
3747 #if defined(PROFILE_BEFORE_PROLOGUE)
3748 flag_fentry = 1;
3749 #else
3750 flag_fentry = 0;
3751 #endif
3752 }
3753
3754 if (TARGET_AVX)
3755 {
3756 /* When not optimize for size, enable vzeroupper optimization for
3757 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3758 AVX unaligned load/store. */
3759 if (!optimize_size)
3760 {
3761 if (flag_expensive_optimizations
3762 && !(target_flags_explicit & MASK_VZEROUPPER))
3763 target_flags |= MASK_VZEROUPPER;
3764 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3765 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3766 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3767 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3768 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3769 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3770 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3771 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3772 target_flags |= MASK_PREFER_AVX128;
3773 }
3774 }
3775 else
3776 {
3777 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3778 target_flags &= ~MASK_VZEROUPPER;
3779 }
3780
3781 /* Save the initial options in case the user does function specific
3782 options. */
3783 if (main_args_p)
3784 target_option_default_node = target_option_current_node
3785 = build_target_option_node ();
3786 }
3787
3788 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3789
3790 static bool
3791 function_pass_avx256_p (const_rtx val)
3792 {
3793 if (!val)
3794 return false;
3795
3796 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3797 return true;
3798
3799 if (GET_CODE (val) == PARALLEL)
3800 {
3801 int i;
3802 rtx r;
3803
3804 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3805 {
3806 r = XVECEXP (val, 0, i);
3807 if (GET_CODE (r) == EXPR_LIST
3808 && XEXP (r, 0)
3809 && REG_P (XEXP (r, 0))
3810 && (GET_MODE (XEXP (r, 0)) == OImode
3811 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3812 return true;
3813 }
3814 }
3815
3816 return false;
3817 }
3818
3819 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3820
3821 static void
3822 ix86_option_override (void)
3823 {
3824 ix86_option_override_internal (true);
3825 }
3826
3827 /* Update register usage after having seen the compiler flags. */
3828
3829 static void
3830 ix86_conditional_register_usage (void)
3831 {
3832 int i;
3833 unsigned int j;
3834
3835 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3836 {
3837 if (fixed_regs[i] > 1)
3838 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3839 if (call_used_regs[i] > 1)
3840 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3841 }
3842
3843 /* The PIC register, if it exists, is fixed. */
3844 j = PIC_OFFSET_TABLE_REGNUM;
3845 if (j != INVALID_REGNUM)
3846 fixed_regs[j] = call_used_regs[j] = 1;
3847
3848 /* The 64-bit MS_ABI changes the set of call-used registers. */
3849 if (TARGET_64BIT_MS_ABI)
3850 {
3851 call_used_regs[SI_REG] = 0;
3852 call_used_regs[DI_REG] = 0;
3853 call_used_regs[XMM6_REG] = 0;
3854 call_used_regs[XMM7_REG] = 0;
3855 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3856 call_used_regs[i] = 0;
3857 }
3858
3859 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3860 other call-clobbered regs for 64-bit. */
3861 if (TARGET_64BIT)
3862 {
3863 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3864
3865 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3866 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3867 && call_used_regs[i])
3868 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3869 }
3870
3871 /* If MMX is disabled, squash the registers. */
3872 if (! TARGET_MMX)
3873 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3874 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3875 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3876
3877 /* If SSE is disabled, squash the registers. */
3878 if (! TARGET_SSE)
3879 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3880 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3881 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3882
3883 /* If the FPU is disabled, squash the registers. */
3884 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3885 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3886 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3887 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3888
3889 /* If 32-bit, squash the 64-bit registers. */
3890 if (! TARGET_64BIT)
3891 {
3892 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3893 reg_names[i] = "";
3894 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3895 reg_names[i] = "";
3896 }
3897 }
3898
3899 \f
3900 /* Save the current options */
3901
3902 static void
3903 ix86_function_specific_save (struct cl_target_option *ptr)
3904 {
3905 ptr->arch = ix86_arch;
3906 ptr->schedule = ix86_schedule;
3907 ptr->tune = ix86_tune;
3908 ptr->branch_cost = ix86_branch_cost;
3909 ptr->tune_defaulted = ix86_tune_defaulted;
3910 ptr->arch_specified = ix86_arch_specified;
3911 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3912 ptr->ix86_target_flags_explicit = target_flags_explicit;
3913
3914 /* The fields are char but the variables are not; make sure the
3915 values fit in the fields. */
3916 gcc_assert (ptr->arch == ix86_arch);
3917 gcc_assert (ptr->schedule == ix86_schedule);
3918 gcc_assert (ptr->tune == ix86_tune);
3919 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3920 }
3921
3922 /* Restore the current options */
3923
3924 static void
3925 ix86_function_specific_restore (struct cl_target_option *ptr)
3926 {
3927 enum processor_type old_tune = ix86_tune;
3928 enum processor_type old_arch = ix86_arch;
3929 unsigned int ix86_arch_mask, ix86_tune_mask;
3930 int i;
3931
3932 ix86_arch = (enum processor_type) ptr->arch;
3933 ix86_schedule = (enum attr_cpu) ptr->schedule;
3934 ix86_tune = (enum processor_type) ptr->tune;
3935 ix86_branch_cost = ptr->branch_cost;
3936 ix86_tune_defaulted = ptr->tune_defaulted;
3937 ix86_arch_specified = ptr->arch_specified;
3938 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3939 target_flags_explicit = ptr->ix86_target_flags_explicit;
3940
3941 /* Recreate the arch feature tests if the arch changed */
3942 if (old_arch != ix86_arch)
3943 {
3944 ix86_arch_mask = 1u << ix86_arch;
3945 for (i = 0; i < X86_ARCH_LAST; ++i)
3946 ix86_arch_features[i]
3947 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3948 }
3949
3950 /* Recreate the tune optimization tests */
3951 if (old_tune != ix86_tune)
3952 {
3953 ix86_tune_mask = 1u << ix86_tune;
3954 for (i = 0; i < X86_TUNE_LAST; ++i)
3955 ix86_tune_features[i]
3956 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3957 }
3958 }
3959
3960 /* Print the current options */
3961
3962 static void
3963 ix86_function_specific_print (FILE *file, int indent,
3964 struct cl_target_option *ptr)
3965 {
3966 char *target_string
3967 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3968 NULL, NULL, ptr->x_ix86_fpmath, false);
3969
3970 fprintf (file, "%*sarch = %d (%s)\n",
3971 indent, "",
3972 ptr->arch,
3973 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3974 ? cpu_names[ptr->arch]
3975 : "<unknown>"));
3976
3977 fprintf (file, "%*stune = %d (%s)\n",
3978 indent, "",
3979 ptr->tune,
3980 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3981 ? cpu_names[ptr->tune]
3982 : "<unknown>"));
3983
3984 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3985
3986 if (target_string)
3987 {
3988 fprintf (file, "%*s%s\n", indent, "", target_string);
3989 free (target_string);
3990 }
3991 }
3992
3993 \f
3994 /* Inner function to process the attribute((target(...))), take an argument and
3995 set the current options from the argument. If we have a list, recursively go
3996 over the list. */
3997
3998 static bool
3999 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4000 struct gcc_options *enum_opts_set)
4001 {
4002 char *next_optstr;
4003 bool ret = true;
4004
4005 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4006 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4007 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4008 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4009 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4010
4011 enum ix86_opt_type
4012 {
4013 ix86_opt_unknown,
4014 ix86_opt_yes,
4015 ix86_opt_no,
4016 ix86_opt_str,
4017 ix86_opt_enum,
4018 ix86_opt_isa
4019 };
4020
4021 static const struct
4022 {
4023 const char *string;
4024 size_t len;
4025 enum ix86_opt_type type;
4026 int opt;
4027 int mask;
4028 } attrs[] = {
4029 /* isa options */
4030 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4031 IX86_ATTR_ISA ("abm", OPT_mabm),
4032 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4033 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4034 IX86_ATTR_ISA ("aes", OPT_maes),
4035 IX86_ATTR_ISA ("avx", OPT_mavx),
4036 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4037 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4038 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4039 IX86_ATTR_ISA ("sse", OPT_msse),
4040 IX86_ATTR_ISA ("sse2", OPT_msse2),
4041 IX86_ATTR_ISA ("sse3", OPT_msse3),
4042 IX86_ATTR_ISA ("sse4", OPT_msse4),
4043 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4044 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4045 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4046 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4047 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4048 IX86_ATTR_ISA ("xop", OPT_mxop),
4049 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4050 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4051 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4052 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4053
4054 /* enum options */
4055 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4056
4057 /* string options */
4058 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4059 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4060
4061 /* flag options */
4062 IX86_ATTR_YES ("cld",
4063 OPT_mcld,
4064 MASK_CLD),
4065
4066 IX86_ATTR_NO ("fancy-math-387",
4067 OPT_mfancy_math_387,
4068 MASK_NO_FANCY_MATH_387),
4069
4070 IX86_ATTR_YES ("ieee-fp",
4071 OPT_mieee_fp,
4072 MASK_IEEE_FP),
4073
4074 IX86_ATTR_YES ("inline-all-stringops",
4075 OPT_minline_all_stringops,
4076 MASK_INLINE_ALL_STRINGOPS),
4077
4078 IX86_ATTR_YES ("inline-stringops-dynamically",
4079 OPT_minline_stringops_dynamically,
4080 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4081
4082 IX86_ATTR_NO ("align-stringops",
4083 OPT_mno_align_stringops,
4084 MASK_NO_ALIGN_STRINGOPS),
4085
4086 IX86_ATTR_YES ("recip",
4087 OPT_mrecip,
4088 MASK_RECIP),
4089
4090 };
4091
4092 /* If this is a list, recurse to get the options. */
4093 if (TREE_CODE (args) == TREE_LIST)
4094 {
4095 bool ret = true;
4096
4097 for (; args; args = TREE_CHAIN (args))
4098 if (TREE_VALUE (args)
4099 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4100 p_strings, enum_opts_set))
4101 ret = false;
4102
4103 return ret;
4104 }
4105
4106 else if (TREE_CODE (args) != STRING_CST)
4107 gcc_unreachable ();
4108
4109 /* Handle multiple arguments separated by commas. */
4110 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4111
4112 while (next_optstr && *next_optstr != '\0')
4113 {
4114 char *p = next_optstr;
4115 char *orig_p = p;
4116 char *comma = strchr (next_optstr, ',');
4117 const char *opt_string;
4118 size_t len, opt_len;
4119 int opt;
4120 bool opt_set_p;
4121 char ch;
4122 unsigned i;
4123 enum ix86_opt_type type = ix86_opt_unknown;
4124 int mask = 0;
4125
4126 if (comma)
4127 {
4128 *comma = '\0';
4129 len = comma - next_optstr;
4130 next_optstr = comma + 1;
4131 }
4132 else
4133 {
4134 len = strlen (p);
4135 next_optstr = NULL;
4136 }
4137
4138 /* Recognize no-xxx. */
4139 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4140 {
4141 opt_set_p = false;
4142 p += 3;
4143 len -= 3;
4144 }
4145 else
4146 opt_set_p = true;
4147
4148 /* Find the option. */
4149 ch = *p;
4150 opt = N_OPTS;
4151 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4152 {
4153 type = attrs[i].type;
4154 opt_len = attrs[i].len;
4155 if (ch == attrs[i].string[0]
4156 && ((type != ix86_opt_str && type != ix86_opt_enum)
4157 ? len == opt_len
4158 : len > opt_len)
4159 && memcmp (p, attrs[i].string, opt_len) == 0)
4160 {
4161 opt = attrs[i].opt;
4162 mask = attrs[i].mask;
4163 opt_string = attrs[i].string;
4164 break;
4165 }
4166 }
4167
4168 /* Process the option. */
4169 if (opt == N_OPTS)
4170 {
4171 error ("attribute(target(\"%s\")) is unknown", orig_p);
4172 ret = false;
4173 }
4174
4175 else if (type == ix86_opt_isa)
4176 {
4177 struct cl_decoded_option decoded;
4178
4179 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4180 ix86_handle_option (&global_options, &global_options_set,
4181 &decoded, input_location);
4182 }
4183
4184 else if (type == ix86_opt_yes || type == ix86_opt_no)
4185 {
4186 if (type == ix86_opt_no)
4187 opt_set_p = !opt_set_p;
4188
4189 if (opt_set_p)
4190 target_flags |= mask;
4191 else
4192 target_flags &= ~mask;
4193 }
4194
4195 else if (type == ix86_opt_str)
4196 {
4197 if (p_strings[opt])
4198 {
4199 error ("option(\"%s\") was already specified", opt_string);
4200 ret = false;
4201 }
4202 else
4203 p_strings[opt] = xstrdup (p + opt_len);
4204 }
4205
4206 else if (type == ix86_opt_enum)
4207 {
4208 bool arg_ok;
4209 int value;
4210
4211 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4212 if (arg_ok)
4213 set_option (&global_options, enum_opts_set, opt, value,
4214 p + opt_len, DK_UNSPECIFIED, input_location,
4215 global_dc);
4216 else
4217 {
4218 error ("attribute(target(\"%s\")) is unknown", orig_p);
4219 ret = false;
4220 }
4221 }
4222
4223 else
4224 gcc_unreachable ();
4225 }
4226
4227 return ret;
4228 }
4229
4230 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4231
4232 tree
4233 ix86_valid_target_attribute_tree (tree args)
4234 {
4235 const char *orig_arch_string = ix86_arch_string;
4236 const char *orig_tune_string = ix86_tune_string;
4237 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4238 int orig_tune_defaulted = ix86_tune_defaulted;
4239 int orig_arch_specified = ix86_arch_specified;
4240 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4241 tree t = NULL_TREE;
4242 int i;
4243 struct cl_target_option *def
4244 = TREE_TARGET_OPTION (target_option_default_node);
4245 struct gcc_options enum_opts_set;
4246
4247 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4248
4249 /* Process each of the options on the chain. */
4250 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4251 &enum_opts_set))
4252 return NULL_TREE;
4253
4254 /* If the changed options are different from the default, rerun
4255 ix86_option_override_internal, and then save the options away.
4256 The string options are are attribute options, and will be undone
4257 when we copy the save structure. */
4258 if (ix86_isa_flags != def->x_ix86_isa_flags
4259 || target_flags != def->x_target_flags
4260 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4261 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4262 || enum_opts_set.x_ix86_fpmath)
4263 {
4264 /* If we are using the default tune= or arch=, undo the string assigned,
4265 and use the default. */
4266 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4267 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4268 else if (!orig_arch_specified)
4269 ix86_arch_string = NULL;
4270
4271 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4272 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4273 else if (orig_tune_defaulted)
4274 ix86_tune_string = NULL;
4275
4276 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4277 if (enum_opts_set.x_ix86_fpmath)
4278 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4279 else if (!TARGET_64BIT && TARGET_SSE)
4280 {
4281 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4282 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4283 }
4284
4285 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4286 ix86_option_override_internal (false);
4287
4288 /* Add any builtin functions with the new isa if any. */
4289 ix86_add_new_builtins (ix86_isa_flags);
4290
4291 /* Save the current options unless we are validating options for
4292 #pragma. */
4293 t = build_target_option_node ();
4294
4295 ix86_arch_string = orig_arch_string;
4296 ix86_tune_string = orig_tune_string;
4297 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4298
4299 /* Free up memory allocated to hold the strings */
4300 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4301 free (option_strings[i]);
4302 }
4303
4304 return t;
4305 }
4306
4307 /* Hook to validate attribute((target("string"))). */
4308
4309 static bool
4310 ix86_valid_target_attribute_p (tree fndecl,
4311 tree ARG_UNUSED (name),
4312 tree args,
4313 int ARG_UNUSED (flags))
4314 {
4315 struct cl_target_option cur_target;
4316 bool ret = true;
4317 tree old_optimize = build_optimization_node ();
4318 tree new_target, new_optimize;
4319 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4320
4321 /* If the function changed the optimization levels as well as setting target
4322 options, start with the optimizations specified. */
4323 if (func_optimize && func_optimize != old_optimize)
4324 cl_optimization_restore (&global_options,
4325 TREE_OPTIMIZATION (func_optimize));
4326
4327 /* The target attributes may also change some optimization flags, so update
4328 the optimization options if necessary. */
4329 cl_target_option_save (&cur_target, &global_options);
4330 new_target = ix86_valid_target_attribute_tree (args);
4331 new_optimize = build_optimization_node ();
4332
4333 if (!new_target)
4334 ret = false;
4335
4336 else if (fndecl)
4337 {
4338 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4339
4340 if (old_optimize != new_optimize)
4341 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4342 }
4343
4344 cl_target_option_restore (&global_options, &cur_target);
4345
4346 if (old_optimize != new_optimize)
4347 cl_optimization_restore (&global_options,
4348 TREE_OPTIMIZATION (old_optimize));
4349
4350 return ret;
4351 }
4352
4353 \f
4354 /* Hook to determine if one function can safely inline another. */
4355
4356 static bool
4357 ix86_can_inline_p (tree caller, tree callee)
4358 {
4359 bool ret = false;
4360 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4361 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4362
4363 /* If callee has no option attributes, then it is ok to inline. */
4364 if (!callee_tree)
4365 ret = true;
4366
4367 /* If caller has no option attributes, but callee does then it is not ok to
4368 inline. */
4369 else if (!caller_tree)
4370 ret = false;
4371
4372 else
4373 {
4374 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4375 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4376
4377 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4378 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4379 function. */
4380 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4381 != callee_opts->x_ix86_isa_flags)
4382 ret = false;
4383
4384 /* See if we have the same non-isa options. */
4385 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4386 ret = false;
4387
4388 /* See if arch, tune, etc. are the same. */
4389 else if (caller_opts->arch != callee_opts->arch)
4390 ret = false;
4391
4392 else if (caller_opts->tune != callee_opts->tune)
4393 ret = false;
4394
4395 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4396 ret = false;
4397
4398 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4399 ret = false;
4400
4401 else
4402 ret = true;
4403 }
4404
4405 return ret;
4406 }
4407
4408 \f
4409 /* Remember the last target of ix86_set_current_function. */
4410 static GTY(()) tree ix86_previous_fndecl;
4411
4412 /* Establish appropriate back-end context for processing the function
4413 FNDECL. The argument might be NULL to indicate processing at top
4414 level, outside of any function scope. */
4415 static void
4416 ix86_set_current_function (tree fndecl)
4417 {
4418 /* Only change the context if the function changes. This hook is called
4419 several times in the course of compiling a function, and we don't want to
4420 slow things down too much or call target_reinit when it isn't safe. */
4421 if (fndecl && fndecl != ix86_previous_fndecl)
4422 {
4423 tree old_tree = (ix86_previous_fndecl
4424 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4425 : NULL_TREE);
4426
4427 tree new_tree = (fndecl
4428 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4429 : NULL_TREE);
4430
4431 ix86_previous_fndecl = fndecl;
4432 if (old_tree == new_tree)
4433 ;
4434
4435 else if (new_tree)
4436 {
4437 cl_target_option_restore (&global_options,
4438 TREE_TARGET_OPTION (new_tree));
4439 target_reinit ();
4440 }
4441
4442 else if (old_tree)
4443 {
4444 struct cl_target_option *def
4445 = TREE_TARGET_OPTION (target_option_current_node);
4446
4447 cl_target_option_restore (&global_options, def);
4448 target_reinit ();
4449 }
4450 }
4451 }
4452
4453 \f
4454 /* Return true if this goes in large data/bss. */
4455
4456 static bool
4457 ix86_in_large_data_p (tree exp)
4458 {
4459 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4460 return false;
4461
4462 /* Functions are never large data. */
4463 if (TREE_CODE (exp) == FUNCTION_DECL)
4464 return false;
4465
4466 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4467 {
4468 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4469 if (strcmp (section, ".ldata") == 0
4470 || strcmp (section, ".lbss") == 0)
4471 return true;
4472 return false;
4473 }
4474 else
4475 {
4476 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4477
4478 /* If this is an incomplete type with size 0, then we can't put it
4479 in data because it might be too big when completed. */
4480 if (!size || size > ix86_section_threshold)
4481 return true;
4482 }
4483
4484 return false;
4485 }
4486
4487 /* Switch to the appropriate section for output of DECL.
4488 DECL is either a `VAR_DECL' node or a constant of some sort.
4489 RELOC indicates whether forming the initial value of DECL requires
4490 link-time relocations. */
4491
4492 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4493 ATTRIBUTE_UNUSED;
4494
4495 static section *
4496 x86_64_elf_select_section (tree decl, int reloc,
4497 unsigned HOST_WIDE_INT align)
4498 {
4499 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4500 && ix86_in_large_data_p (decl))
4501 {
4502 const char *sname = NULL;
4503 unsigned int flags = SECTION_WRITE;
4504 switch (categorize_decl_for_section (decl, reloc))
4505 {
4506 case SECCAT_DATA:
4507 sname = ".ldata";
4508 break;
4509 case SECCAT_DATA_REL:
4510 sname = ".ldata.rel";
4511 break;
4512 case SECCAT_DATA_REL_LOCAL:
4513 sname = ".ldata.rel.local";
4514 break;
4515 case SECCAT_DATA_REL_RO:
4516 sname = ".ldata.rel.ro";
4517 break;
4518 case SECCAT_DATA_REL_RO_LOCAL:
4519 sname = ".ldata.rel.ro.local";
4520 break;
4521 case SECCAT_BSS:
4522 sname = ".lbss";
4523 flags |= SECTION_BSS;
4524 break;
4525 case SECCAT_RODATA:
4526 case SECCAT_RODATA_MERGE_STR:
4527 case SECCAT_RODATA_MERGE_STR_INIT:
4528 case SECCAT_RODATA_MERGE_CONST:
4529 sname = ".lrodata";
4530 flags = 0;
4531 break;
4532 case SECCAT_SRODATA:
4533 case SECCAT_SDATA:
4534 case SECCAT_SBSS:
4535 gcc_unreachable ();
4536 case SECCAT_TEXT:
4537 case SECCAT_TDATA:
4538 case SECCAT_TBSS:
4539 /* We don't split these for medium model. Place them into
4540 default sections and hope for best. */
4541 break;
4542 }
4543 if (sname)
4544 {
4545 /* We might get called with string constants, but get_named_section
4546 doesn't like them as they are not DECLs. Also, we need to set
4547 flags in that case. */
4548 if (!DECL_P (decl))
4549 return get_section (sname, flags, NULL);
4550 return get_named_section (decl, sname, reloc);
4551 }
4552 }
4553 return default_elf_select_section (decl, reloc, align);
4554 }
4555
4556 /* Build up a unique section name, expressed as a
4557 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4558 RELOC indicates whether the initial value of EXP requires
4559 link-time relocations. */
4560
4561 static void ATTRIBUTE_UNUSED
4562 x86_64_elf_unique_section (tree decl, int reloc)
4563 {
4564 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4565 && ix86_in_large_data_p (decl))
4566 {
4567 const char *prefix = NULL;
4568 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4569 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4570
4571 switch (categorize_decl_for_section (decl, reloc))
4572 {
4573 case SECCAT_DATA:
4574 case SECCAT_DATA_REL:
4575 case SECCAT_DATA_REL_LOCAL:
4576 case SECCAT_DATA_REL_RO:
4577 case SECCAT_DATA_REL_RO_LOCAL:
4578 prefix = one_only ? ".ld" : ".ldata";
4579 break;
4580 case SECCAT_BSS:
4581 prefix = one_only ? ".lb" : ".lbss";
4582 break;
4583 case SECCAT_RODATA:
4584 case SECCAT_RODATA_MERGE_STR:
4585 case SECCAT_RODATA_MERGE_STR_INIT:
4586 case SECCAT_RODATA_MERGE_CONST:
4587 prefix = one_only ? ".lr" : ".lrodata";
4588 break;
4589 case SECCAT_SRODATA:
4590 case SECCAT_SDATA:
4591 case SECCAT_SBSS:
4592 gcc_unreachable ();
4593 case SECCAT_TEXT:
4594 case SECCAT_TDATA:
4595 case SECCAT_TBSS:
4596 /* We don't split these for medium model. Place them into
4597 default sections and hope for best. */
4598 break;
4599 }
4600 if (prefix)
4601 {
4602 const char *name, *linkonce;
4603 char *string;
4604
4605 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4606 name = targetm.strip_name_encoding (name);
4607
4608 /* If we're using one_only, then there needs to be a .gnu.linkonce
4609 prefix to the section name. */
4610 linkonce = one_only ? ".gnu.linkonce" : "";
4611
4612 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4613
4614 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4615 return;
4616 }
4617 }
4618 default_unique_section (decl, reloc);
4619 }
4620
4621 #ifdef COMMON_ASM_OP
4622 /* This says how to output assembler code to declare an
4623 uninitialized external linkage data object.
4624
4625 For medium model x86-64 we need to use .largecomm opcode for
4626 large objects. */
4627 void
4628 x86_elf_aligned_common (FILE *file,
4629 const char *name, unsigned HOST_WIDE_INT size,
4630 int align)
4631 {
4632 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4633 && size > (unsigned int)ix86_section_threshold)
4634 fputs (".largecomm\t", file);
4635 else
4636 fputs (COMMON_ASM_OP, file);
4637 assemble_name (file, name);
4638 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4639 size, align / BITS_PER_UNIT);
4640 }
4641 #endif
4642
4643 /* Utility function for targets to use in implementing
4644 ASM_OUTPUT_ALIGNED_BSS. */
4645
4646 void
4647 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4648 const char *name, unsigned HOST_WIDE_INT size,
4649 int align)
4650 {
4651 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4652 && size > (unsigned int)ix86_section_threshold)
4653 switch_to_section (get_named_section (decl, ".lbss", 0));
4654 else
4655 switch_to_section (bss_section);
4656 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4657 #ifdef ASM_DECLARE_OBJECT_NAME
4658 last_assemble_variable_decl = decl;
4659 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4660 #else
4661 /* Standard thing is just output label for the object. */
4662 ASM_OUTPUT_LABEL (file, name);
4663 #endif /* ASM_DECLARE_OBJECT_NAME */
4664 ASM_OUTPUT_SKIP (file, size ? size : 1);
4665 }
4666 \f
4667 /* Decide whether we must probe the stack before any space allocation
4668 on this target. It's essentially TARGET_STACK_PROBE except when
4669 -fstack-check causes the stack to be already probed differently. */
4670
4671 bool
4672 ix86_target_stack_probe (void)
4673 {
4674 /* Do not probe the stack twice if static stack checking is enabled. */
4675 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4676 return false;
4677
4678 return TARGET_STACK_PROBE;
4679 }
4680 \f
4681 /* Decide whether we can make a sibling call to a function. DECL is the
4682 declaration of the function being targeted by the call and EXP is the
4683 CALL_EXPR representing the call. */
4684
4685 static bool
4686 ix86_function_ok_for_sibcall (tree decl, tree exp)
4687 {
4688 tree type, decl_or_type;
4689 rtx a, b;
4690
4691 /* If we are generating position-independent code, we cannot sibcall
4692 optimize any indirect call, or a direct call to a global function,
4693 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4694 if (!TARGET_MACHO
4695 && !TARGET_64BIT
4696 && flag_pic
4697 && (!decl || !targetm.binds_local_p (decl)))
4698 return false;
4699
4700 /* If we need to align the outgoing stack, then sibcalling would
4701 unalign the stack, which may break the called function. */
4702 if (ix86_minimum_incoming_stack_boundary (true)
4703 < PREFERRED_STACK_BOUNDARY)
4704 return false;
4705
4706 if (decl)
4707 {
4708 decl_or_type = decl;
4709 type = TREE_TYPE (decl);
4710 }
4711 else
4712 {
4713 /* We're looking at the CALL_EXPR, we need the type of the function. */
4714 type = CALL_EXPR_FN (exp); /* pointer expression */
4715 type = TREE_TYPE (type); /* pointer type */
4716 type = TREE_TYPE (type); /* function type */
4717 decl_or_type = type;
4718 }
4719
4720 /* Check that the return value locations are the same. Like
4721 if we are returning floats on the 80387 register stack, we cannot
4722 make a sibcall from a function that doesn't return a float to a
4723 function that does or, conversely, from a function that does return
4724 a float to a function that doesn't; the necessary stack adjustment
4725 would not be executed. This is also the place we notice
4726 differences in the return value ABI. Note that it is ok for one
4727 of the functions to have void return type as long as the return
4728 value of the other is passed in a register. */
4729 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4730 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4731 cfun->decl, false);
4732 if (STACK_REG_P (a) || STACK_REG_P (b))
4733 {
4734 if (!rtx_equal_p (a, b))
4735 return false;
4736 }
4737 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4738 {
4739 /* Disable sibcall if we need to generate vzeroupper after
4740 callee returns. */
4741 if (TARGET_VZEROUPPER
4742 && cfun->machine->callee_return_avx256_p
4743 && !cfun->machine->caller_return_avx256_p)
4744 return false;
4745 }
4746 else if (!rtx_equal_p (a, b))
4747 return false;
4748
4749 if (TARGET_64BIT)
4750 {
4751 /* The SYSV ABI has more call-clobbered registers;
4752 disallow sibcalls from MS to SYSV. */
4753 if (cfun->machine->call_abi == MS_ABI
4754 && ix86_function_type_abi (type) == SYSV_ABI)
4755 return false;
4756 }
4757 else
4758 {
4759 /* If this call is indirect, we'll need to be able to use a
4760 call-clobbered register for the address of the target function.
4761 Make sure that all such registers are not used for passing
4762 parameters. Note that DLLIMPORT functions are indirect. */
4763 if (!decl
4764 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4765 {
4766 if (ix86_function_regparm (type, NULL) >= 3)
4767 {
4768 /* ??? Need to count the actual number of registers to be used,
4769 not the possible number of registers. Fix later. */
4770 return false;
4771 }
4772 }
4773 }
4774
4775 /* Otherwise okay. That also includes certain types of indirect calls. */
4776 return true;
4777 }
4778
4779 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4780 and "sseregparm" calling convention attributes;
4781 arguments as in struct attribute_spec.handler. */
4782
4783 static tree
4784 ix86_handle_cconv_attribute (tree *node, tree name,
4785 tree args,
4786 int flags ATTRIBUTE_UNUSED,
4787 bool *no_add_attrs)
4788 {
4789 if (TREE_CODE (*node) != FUNCTION_TYPE
4790 && TREE_CODE (*node) != METHOD_TYPE
4791 && TREE_CODE (*node) != FIELD_DECL
4792 && TREE_CODE (*node) != TYPE_DECL)
4793 {
4794 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4795 name);
4796 *no_add_attrs = true;
4797 return NULL_TREE;
4798 }
4799
4800 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4801 if (is_attribute_p ("regparm", name))
4802 {
4803 tree cst;
4804
4805 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4806 {
4807 error ("fastcall and regparm attributes are not compatible");
4808 }
4809
4810 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4811 {
4812 error ("regparam and thiscall attributes are not compatible");
4813 }
4814
4815 cst = TREE_VALUE (args);
4816 if (TREE_CODE (cst) != INTEGER_CST)
4817 {
4818 warning (OPT_Wattributes,
4819 "%qE attribute requires an integer constant argument",
4820 name);
4821 *no_add_attrs = true;
4822 }
4823 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4824 {
4825 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4826 name, REGPARM_MAX);
4827 *no_add_attrs = true;
4828 }
4829
4830 return NULL_TREE;
4831 }
4832
4833 if (TARGET_64BIT)
4834 {
4835 /* Do not warn when emulating the MS ABI. */
4836 if ((TREE_CODE (*node) != FUNCTION_TYPE
4837 && TREE_CODE (*node) != METHOD_TYPE)
4838 || ix86_function_type_abi (*node) != MS_ABI)
4839 warning (OPT_Wattributes, "%qE attribute ignored",
4840 name);
4841 *no_add_attrs = true;
4842 return NULL_TREE;
4843 }
4844
4845 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4846 if (is_attribute_p ("fastcall", name))
4847 {
4848 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4849 {
4850 error ("fastcall and cdecl attributes are not compatible");
4851 }
4852 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4853 {
4854 error ("fastcall and stdcall attributes are not compatible");
4855 }
4856 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4857 {
4858 error ("fastcall and regparm attributes are not compatible");
4859 }
4860 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4861 {
4862 error ("fastcall and thiscall attributes are not compatible");
4863 }
4864 }
4865
4866 /* Can combine stdcall with fastcall (redundant), regparm and
4867 sseregparm. */
4868 else if (is_attribute_p ("stdcall", name))
4869 {
4870 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4871 {
4872 error ("stdcall and cdecl attributes are not compatible");
4873 }
4874 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4875 {
4876 error ("stdcall and fastcall attributes are not compatible");
4877 }
4878 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4879 {
4880 error ("stdcall and thiscall attributes are not compatible");
4881 }
4882 }
4883
4884 /* Can combine cdecl with regparm and sseregparm. */
4885 else if (is_attribute_p ("cdecl", name))
4886 {
4887 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4888 {
4889 error ("stdcall and cdecl attributes are not compatible");
4890 }
4891 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4892 {
4893 error ("fastcall and cdecl attributes are not compatible");
4894 }
4895 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4896 {
4897 error ("cdecl and thiscall attributes are not compatible");
4898 }
4899 }
4900 else if (is_attribute_p ("thiscall", name))
4901 {
4902 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4903 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4904 name);
4905 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4906 {
4907 error ("stdcall and thiscall attributes are not compatible");
4908 }
4909 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4910 {
4911 error ("fastcall and thiscall attributes are not compatible");
4912 }
4913 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4914 {
4915 error ("cdecl and thiscall attributes are not compatible");
4916 }
4917 }
4918
4919 /* Can combine sseregparm with all attributes. */
4920
4921 return NULL_TREE;
4922 }
4923
4924 /* This function determines from TYPE the calling-convention. */
4925
4926 unsigned int
4927 ix86_get_callcvt (const_tree type)
4928 {
4929 unsigned int ret = 0;
4930 bool is_stdarg;
4931 tree attrs;
4932
4933 if (TARGET_64BIT)
4934 return IX86_CALLCVT_CDECL;
4935
4936 attrs = TYPE_ATTRIBUTES (type);
4937 if (attrs != NULL_TREE)
4938 {
4939 if (lookup_attribute ("cdecl", attrs))
4940 ret |= IX86_CALLCVT_CDECL;
4941 else if (lookup_attribute ("stdcall", attrs))
4942 ret |= IX86_CALLCVT_STDCALL;
4943 else if (lookup_attribute ("fastcall", attrs))
4944 ret |= IX86_CALLCVT_FASTCALL;
4945 else if (lookup_attribute ("thiscall", attrs))
4946 ret |= IX86_CALLCVT_THISCALL;
4947
4948 /* Regparam isn't allowed for thiscall and fastcall. */
4949 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4950 {
4951 if (lookup_attribute ("regparm", attrs))
4952 ret |= IX86_CALLCVT_REGPARM;
4953 if (lookup_attribute ("sseregparm", attrs))
4954 ret |= IX86_CALLCVT_SSEREGPARM;
4955 }
4956
4957 if (IX86_BASE_CALLCVT(ret) != 0)
4958 return ret;
4959 }
4960
4961 is_stdarg = stdarg_p (type);
4962 if (TARGET_RTD && !is_stdarg)
4963 return IX86_CALLCVT_STDCALL | ret;
4964
4965 if (ret != 0
4966 || is_stdarg
4967 || TREE_CODE (type) != METHOD_TYPE
4968 || ix86_function_type_abi (type) != MS_ABI)
4969 return IX86_CALLCVT_CDECL | ret;
4970
4971 return IX86_CALLCVT_THISCALL;
4972 }
4973
4974 /* Return 0 if the attributes for two types are incompatible, 1 if they
4975 are compatible, and 2 if they are nearly compatible (which causes a
4976 warning to be generated). */
4977
4978 static int
4979 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4980 {
4981 unsigned int ccvt1, ccvt2;
4982
4983 if (TREE_CODE (type1) != FUNCTION_TYPE
4984 && TREE_CODE (type1) != METHOD_TYPE)
4985 return 1;
4986
4987 ccvt1 = ix86_get_callcvt (type1);
4988 ccvt2 = ix86_get_callcvt (type2);
4989 if (ccvt1 != ccvt2)
4990 return 0;
4991 if (ix86_function_regparm (type1, NULL)
4992 != ix86_function_regparm (type2, NULL))
4993 return 0;
4994
4995 return 1;
4996 }
4997 \f
4998 /* Return the regparm value for a function with the indicated TYPE and DECL.
4999 DECL may be NULL when calling function indirectly
5000 or considering a libcall. */
5001
5002 static int
5003 ix86_function_regparm (const_tree type, const_tree decl)
5004 {
5005 tree attr;
5006 int regparm;
5007 unsigned int ccvt;
5008
5009 if (TARGET_64BIT)
5010 return (ix86_function_type_abi (type) == SYSV_ABI
5011 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5012 ccvt = ix86_get_callcvt (type);
5013 regparm = ix86_regparm;
5014
5015 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5016 {
5017 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5018 if (attr)
5019 {
5020 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5021 return regparm;
5022 }
5023 }
5024 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5025 return 2;
5026 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5027 return 1;
5028
5029 /* Use register calling convention for local functions when possible. */
5030 if (decl
5031 && TREE_CODE (decl) == FUNCTION_DECL
5032 && optimize
5033 && !(profile_flag && !flag_fentry))
5034 {
5035 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5036 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5037 if (i && i->local && i->can_change_signature)
5038 {
5039 int local_regparm, globals = 0, regno;
5040
5041 /* Make sure no regparm register is taken by a
5042 fixed register variable. */
5043 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5044 if (fixed_regs[local_regparm])
5045 break;
5046
5047 /* We don't want to use regparm(3) for nested functions as
5048 these use a static chain pointer in the third argument. */
5049 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5050 local_regparm = 2;
5051
5052 /* In 32-bit mode save a register for the split stack. */
5053 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5054 local_regparm = 2;
5055
5056 /* Each fixed register usage increases register pressure,
5057 so less registers should be used for argument passing.
5058 This functionality can be overriden by an explicit
5059 regparm value. */
5060 for (regno = 0; regno <= DI_REG; regno++)
5061 if (fixed_regs[regno])
5062 globals++;
5063
5064 local_regparm
5065 = globals < local_regparm ? local_regparm - globals : 0;
5066
5067 if (local_regparm > regparm)
5068 regparm = local_regparm;
5069 }
5070 }
5071
5072 return regparm;
5073 }
5074
5075 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5076 DFmode (2) arguments in SSE registers for a function with the
5077 indicated TYPE and DECL. DECL may be NULL when calling function
5078 indirectly or considering a libcall. Otherwise return 0. */
5079
5080 static int
5081 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5082 {
5083 gcc_assert (!TARGET_64BIT);
5084
5085 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5086 by the sseregparm attribute. */
5087 if (TARGET_SSEREGPARM
5088 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5089 {
5090 if (!TARGET_SSE)
5091 {
5092 if (warn)
5093 {
5094 if (decl)
5095 error ("calling %qD with attribute sseregparm without "
5096 "SSE/SSE2 enabled", decl);
5097 else
5098 error ("calling %qT with attribute sseregparm without "
5099 "SSE/SSE2 enabled", type);
5100 }
5101 return 0;
5102 }
5103
5104 return 2;
5105 }
5106
5107 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5108 (and DFmode for SSE2) arguments in SSE registers. */
5109 if (decl && TARGET_SSE_MATH && optimize
5110 && !(profile_flag && !flag_fentry))
5111 {
5112 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5113 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5114 if (i && i->local && i->can_change_signature)
5115 return TARGET_SSE2 ? 2 : 1;
5116 }
5117
5118 return 0;
5119 }
5120
5121 /* Return true if EAX is live at the start of the function. Used by
5122 ix86_expand_prologue to determine if we need special help before
5123 calling allocate_stack_worker. */
5124
5125 static bool
5126 ix86_eax_live_at_start_p (void)
5127 {
5128 /* Cheat. Don't bother working forward from ix86_function_regparm
5129 to the function type to whether an actual argument is located in
5130 eax. Instead just look at cfg info, which is still close enough
5131 to correct at this point. This gives false positives for broken
5132 functions that might use uninitialized data that happens to be
5133 allocated in eax, but who cares? */
5134 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5135 }
5136
5137 static bool
5138 ix86_keep_aggregate_return_pointer (tree fntype)
5139 {
5140 tree attr;
5141
5142 if (!TARGET_64BIT)
5143 {
5144 attr = lookup_attribute ("callee_pop_aggregate_return",
5145 TYPE_ATTRIBUTES (fntype));
5146 if (attr)
5147 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5148
5149 /* For 32-bit MS-ABI the default is to keep aggregate
5150 return pointer. */
5151 if (ix86_function_type_abi (fntype) == MS_ABI)
5152 return true;
5153 }
5154 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5155 }
5156
5157 /* Value is the number of bytes of arguments automatically
5158 popped when returning from a subroutine call.
5159 FUNDECL is the declaration node of the function (as a tree),
5160 FUNTYPE is the data type of the function (as a tree),
5161 or for a library call it is an identifier node for the subroutine name.
5162 SIZE is the number of bytes of arguments passed on the stack.
5163
5164 On the 80386, the RTD insn may be used to pop them if the number
5165 of args is fixed, but if the number is variable then the caller
5166 must pop them all. RTD can't be used for library calls now
5167 because the library is compiled with the Unix compiler.
5168 Use of RTD is a selectable option, since it is incompatible with
5169 standard Unix calling sequences. If the option is not selected,
5170 the caller must always pop the args.
5171
5172 The attribute stdcall is equivalent to RTD on a per module basis. */
5173
5174 static int
5175 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5176 {
5177 unsigned int ccvt;
5178
5179 /* None of the 64-bit ABIs pop arguments. */
5180 if (TARGET_64BIT)
5181 return 0;
5182
5183 ccvt = ix86_get_callcvt (funtype);
5184
5185 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5186 | IX86_CALLCVT_THISCALL)) != 0
5187 && ! stdarg_p (funtype))
5188 return size;
5189
5190 /* Lose any fake structure return argument if it is passed on the stack. */
5191 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5192 && !ix86_keep_aggregate_return_pointer (funtype))
5193 {
5194 int nregs = ix86_function_regparm (funtype, fundecl);
5195 if (nregs == 0)
5196 return GET_MODE_SIZE (Pmode);
5197 }
5198
5199 return 0;
5200 }
5201 \f
5202 /* Argument support functions. */
5203
5204 /* Return true when register may be used to pass function parameters. */
5205 bool
5206 ix86_function_arg_regno_p (int regno)
5207 {
5208 int i;
5209 const int *parm_regs;
5210
5211 if (!TARGET_64BIT)
5212 {
5213 if (TARGET_MACHO)
5214 return (regno < REGPARM_MAX
5215 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5216 else
5217 return (regno < REGPARM_MAX
5218 || (TARGET_MMX && MMX_REGNO_P (regno)
5219 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5220 || (TARGET_SSE && SSE_REGNO_P (regno)
5221 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5222 }
5223
5224 if (TARGET_MACHO)
5225 {
5226 if (SSE_REGNO_P (regno) && TARGET_SSE)
5227 return true;
5228 }
5229 else
5230 {
5231 if (TARGET_SSE && SSE_REGNO_P (regno)
5232 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5233 return true;
5234 }
5235
5236 /* TODO: The function should depend on current function ABI but
5237 builtins.c would need updating then. Therefore we use the
5238 default ABI. */
5239
5240 /* RAX is used as hidden argument to va_arg functions. */
5241 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5242 return true;
5243
5244 if (ix86_abi == MS_ABI)
5245 parm_regs = x86_64_ms_abi_int_parameter_registers;
5246 else
5247 parm_regs = x86_64_int_parameter_registers;
5248 for (i = 0; i < (ix86_abi == MS_ABI
5249 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5250 if (regno == parm_regs[i])
5251 return true;
5252 return false;
5253 }
5254
5255 /* Return if we do not know how to pass TYPE solely in registers. */
5256
5257 static bool
5258 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5259 {
5260 if (must_pass_in_stack_var_size_or_pad (mode, type))
5261 return true;
5262
5263 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5264 The layout_type routine is crafty and tries to trick us into passing
5265 currently unsupported vector types on the stack by using TImode. */
5266 return (!TARGET_64BIT && mode == TImode
5267 && type && TREE_CODE (type) != VECTOR_TYPE);
5268 }
5269
5270 /* It returns the size, in bytes, of the area reserved for arguments passed
5271 in registers for the function represented by fndecl dependent to the used
5272 abi format. */
5273 int
5274 ix86_reg_parm_stack_space (const_tree fndecl)
5275 {
5276 enum calling_abi call_abi = SYSV_ABI;
5277 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5278 call_abi = ix86_function_abi (fndecl);
5279 else
5280 call_abi = ix86_function_type_abi (fndecl);
5281 if (TARGET_64BIT && call_abi == MS_ABI)
5282 return 32;
5283 return 0;
5284 }
5285
5286 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5287 call abi used. */
5288 enum calling_abi
5289 ix86_function_type_abi (const_tree fntype)
5290 {
5291 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5292 {
5293 enum calling_abi abi = ix86_abi;
5294 if (abi == SYSV_ABI)
5295 {
5296 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5297 abi = MS_ABI;
5298 }
5299 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5300 abi = SYSV_ABI;
5301 return abi;
5302 }
5303 return ix86_abi;
5304 }
5305
5306 static bool
5307 ix86_function_ms_hook_prologue (const_tree fn)
5308 {
5309 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5310 {
5311 if (decl_function_context (fn) != NULL_TREE)
5312 error_at (DECL_SOURCE_LOCATION (fn),
5313 "ms_hook_prologue is not compatible with nested function");
5314 else
5315 return true;
5316 }
5317 return false;
5318 }
5319
5320 static enum calling_abi
5321 ix86_function_abi (const_tree fndecl)
5322 {
5323 if (! fndecl)
5324 return ix86_abi;
5325 return ix86_function_type_abi (TREE_TYPE (fndecl));
5326 }
5327
5328 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5329 call abi used. */
5330 enum calling_abi
5331 ix86_cfun_abi (void)
5332 {
5333 if (! cfun)
5334 return ix86_abi;
5335 return cfun->machine->call_abi;
5336 }
5337
5338 /* Write the extra assembler code needed to declare a function properly. */
5339
5340 void
5341 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5342 tree decl)
5343 {
5344 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5345
5346 if (is_ms_hook)
5347 {
5348 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5349 unsigned int filler_cc = 0xcccccccc;
5350
5351 for (i = 0; i < filler_count; i += 4)
5352 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5353 }
5354
5355 #ifdef SUBTARGET_ASM_UNWIND_INIT
5356 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5357 #endif
5358
5359 ASM_OUTPUT_LABEL (asm_out_file, fname);
5360
5361 /* Output magic byte marker, if hot-patch attribute is set. */
5362 if (is_ms_hook)
5363 {
5364 if (TARGET_64BIT)
5365 {
5366 /* leaq [%rsp + 0], %rsp */
5367 asm_fprintf (asm_out_file, ASM_BYTE
5368 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5369 }
5370 else
5371 {
5372 /* movl.s %edi, %edi
5373 push %ebp
5374 movl.s %esp, %ebp */
5375 asm_fprintf (asm_out_file, ASM_BYTE
5376 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5377 }
5378 }
5379 }
5380
5381 /* regclass.c */
5382 extern void init_regs (void);
5383
5384 /* Implementation of call abi switching target hook. Specific to FNDECL
5385 the specific call register sets are set. See also
5386 ix86_conditional_register_usage for more details. */
5387 void
5388 ix86_call_abi_override (const_tree fndecl)
5389 {
5390 if (fndecl == NULL_TREE)
5391 cfun->machine->call_abi = ix86_abi;
5392 else
5393 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5394 }
5395
5396 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5397 expensive re-initialization of init_regs each time we switch function context
5398 since this is needed only during RTL expansion. */
5399 static void
5400 ix86_maybe_switch_abi (void)
5401 {
5402 if (TARGET_64BIT &&
5403 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5404 reinit_regs ();
5405 }
5406
5407 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5408 for a call to a function whose data type is FNTYPE.
5409 For a library call, FNTYPE is 0. */
5410
5411 void
5412 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5413 tree fntype, /* tree ptr for function decl */
5414 rtx libname, /* SYMBOL_REF of library name or 0 */
5415 tree fndecl,
5416 int caller)
5417 {
5418 struct cgraph_local_info *i;
5419 tree fnret_type;
5420
5421 memset (cum, 0, sizeof (*cum));
5422
5423 /* Initialize for the current callee. */
5424 if (caller)
5425 {
5426 cfun->machine->callee_pass_avx256_p = false;
5427 cfun->machine->callee_return_avx256_p = false;
5428 }
5429
5430 if (fndecl)
5431 {
5432 i = cgraph_local_info (fndecl);
5433 cum->call_abi = ix86_function_abi (fndecl);
5434 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5435 }
5436 else
5437 {
5438 i = NULL;
5439 cum->call_abi = ix86_function_type_abi (fntype);
5440 if (fntype)
5441 fnret_type = TREE_TYPE (fntype);
5442 else
5443 fnret_type = NULL;
5444 }
5445
5446 if (TARGET_VZEROUPPER && fnret_type)
5447 {
5448 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5449 false);
5450 if (function_pass_avx256_p (fnret_value))
5451 {
5452 /* The return value of this function uses 256bit AVX modes. */
5453 if (caller)
5454 cfun->machine->callee_return_avx256_p = true;
5455 else
5456 cfun->machine->caller_return_avx256_p = true;
5457 }
5458 }
5459
5460 cum->caller = caller;
5461
5462 /* Set up the number of registers to use for passing arguments. */
5463
5464 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5465 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5466 "or subtarget optimization implying it");
5467 cum->nregs = ix86_regparm;
5468 if (TARGET_64BIT)
5469 {
5470 cum->nregs = (cum->call_abi == SYSV_ABI
5471 ? X86_64_REGPARM_MAX
5472 : X86_64_MS_REGPARM_MAX);
5473 }
5474 if (TARGET_SSE)
5475 {
5476 cum->sse_nregs = SSE_REGPARM_MAX;
5477 if (TARGET_64BIT)
5478 {
5479 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5480 ? X86_64_SSE_REGPARM_MAX
5481 : X86_64_MS_SSE_REGPARM_MAX);
5482 }
5483 }
5484 if (TARGET_MMX)
5485 cum->mmx_nregs = MMX_REGPARM_MAX;
5486 cum->warn_avx = true;
5487 cum->warn_sse = true;
5488 cum->warn_mmx = true;
5489
5490 /* Because type might mismatch in between caller and callee, we need to
5491 use actual type of function for local calls.
5492 FIXME: cgraph_analyze can be told to actually record if function uses
5493 va_start so for local functions maybe_vaarg can be made aggressive
5494 helping K&R code.
5495 FIXME: once typesytem is fixed, we won't need this code anymore. */
5496 if (i && i->local && i->can_change_signature)
5497 fntype = TREE_TYPE (fndecl);
5498 cum->maybe_vaarg = (fntype
5499 ? (!prototype_p (fntype) || stdarg_p (fntype))
5500 : !libname);
5501
5502 if (!TARGET_64BIT)
5503 {
5504 /* If there are variable arguments, then we won't pass anything
5505 in registers in 32-bit mode. */
5506 if (stdarg_p (fntype))
5507 {
5508 cum->nregs = 0;
5509 cum->sse_nregs = 0;
5510 cum->mmx_nregs = 0;
5511 cum->warn_avx = 0;
5512 cum->warn_sse = 0;
5513 cum->warn_mmx = 0;
5514 return;
5515 }
5516
5517 /* Use ecx and edx registers if function has fastcall attribute,
5518 else look for regparm information. */
5519 if (fntype)
5520 {
5521 unsigned int ccvt = ix86_get_callcvt (fntype);
5522 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5523 {
5524 cum->nregs = 1;
5525 cum->fastcall = 1; /* Same first register as in fastcall. */
5526 }
5527 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5528 {
5529 cum->nregs = 2;
5530 cum->fastcall = 1;
5531 }
5532 else
5533 cum->nregs = ix86_function_regparm (fntype, fndecl);
5534 }
5535
5536 /* Set up the number of SSE registers used for passing SFmode
5537 and DFmode arguments. Warn for mismatching ABI. */
5538 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5539 }
5540 }
5541
5542 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5543 But in the case of vector types, it is some vector mode.
5544
5545 When we have only some of our vector isa extensions enabled, then there
5546 are some modes for which vector_mode_supported_p is false. For these
5547 modes, the generic vector support in gcc will choose some non-vector mode
5548 in order to implement the type. By computing the natural mode, we'll
5549 select the proper ABI location for the operand and not depend on whatever
5550 the middle-end decides to do with these vector types.
5551
5552 The midde-end can't deal with the vector types > 16 bytes. In this
5553 case, we return the original mode and warn ABI change if CUM isn't
5554 NULL. */
5555
5556 static enum machine_mode
5557 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5558 {
5559 enum machine_mode mode = TYPE_MODE (type);
5560
5561 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5562 {
5563 HOST_WIDE_INT size = int_size_in_bytes (type);
5564 if ((size == 8 || size == 16 || size == 32)
5565 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5566 && TYPE_VECTOR_SUBPARTS (type) > 1)
5567 {
5568 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5569
5570 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5571 mode = MIN_MODE_VECTOR_FLOAT;
5572 else
5573 mode = MIN_MODE_VECTOR_INT;
5574
5575 /* Get the mode which has this inner mode and number of units. */
5576 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5577 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5578 && GET_MODE_INNER (mode) == innermode)
5579 {
5580 if (size == 32 && !TARGET_AVX)
5581 {
5582 static bool warnedavx;
5583
5584 if (cum
5585 && !warnedavx
5586 && cum->warn_avx)
5587 {
5588 warnedavx = true;
5589 warning (0, "AVX vector argument without AVX "
5590 "enabled changes the ABI");
5591 }
5592 return TYPE_MODE (type);
5593 }
5594 else
5595 return mode;
5596 }
5597
5598 gcc_unreachable ();
5599 }
5600 }
5601
5602 return mode;
5603 }
5604
5605 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5606 this may not agree with the mode that the type system has chosen for the
5607 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5608 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5609
5610 static rtx
5611 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5612 unsigned int regno)
5613 {
5614 rtx tmp;
5615
5616 if (orig_mode != BLKmode)
5617 tmp = gen_rtx_REG (orig_mode, regno);
5618 else
5619 {
5620 tmp = gen_rtx_REG (mode, regno);
5621 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5622 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5623 }
5624
5625 return tmp;
5626 }
5627
5628 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5629 of this code is to classify each 8bytes of incoming argument by the register
5630 class and assign registers accordingly. */
5631
5632 /* Return the union class of CLASS1 and CLASS2.
5633 See the x86-64 PS ABI for details. */
5634
5635 static enum x86_64_reg_class
5636 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5637 {
5638 /* Rule #1: If both classes are equal, this is the resulting class. */
5639 if (class1 == class2)
5640 return class1;
5641
5642 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5643 the other class. */
5644 if (class1 == X86_64_NO_CLASS)
5645 return class2;
5646 if (class2 == X86_64_NO_CLASS)
5647 return class1;
5648
5649 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5650 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5651 return X86_64_MEMORY_CLASS;
5652
5653 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5654 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5655 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5656 return X86_64_INTEGERSI_CLASS;
5657 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5658 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5659 return X86_64_INTEGER_CLASS;
5660
5661 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5662 MEMORY is used. */
5663 if (class1 == X86_64_X87_CLASS
5664 || class1 == X86_64_X87UP_CLASS
5665 || class1 == X86_64_COMPLEX_X87_CLASS
5666 || class2 == X86_64_X87_CLASS
5667 || class2 == X86_64_X87UP_CLASS
5668 || class2 == X86_64_COMPLEX_X87_CLASS)
5669 return X86_64_MEMORY_CLASS;
5670
5671 /* Rule #6: Otherwise class SSE is used. */
5672 return X86_64_SSE_CLASS;
5673 }
5674
5675 /* Classify the argument of type TYPE and mode MODE.
5676 CLASSES will be filled by the register class used to pass each word
5677 of the operand. The number of words is returned. In case the parameter
5678 should be passed in memory, 0 is returned. As a special case for zero
5679 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5680
5681 BIT_OFFSET is used internally for handling records and specifies offset
5682 of the offset in bits modulo 256 to avoid overflow cases.
5683
5684 See the x86-64 PS ABI for details.
5685 */
5686
5687 static int
5688 classify_argument (enum machine_mode mode, const_tree type,
5689 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5690 {
5691 HOST_WIDE_INT bytes =
5692 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5693 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5694
5695 /* Variable sized entities are always passed/returned in memory. */
5696 if (bytes < 0)
5697 return 0;
5698
5699 if (mode != VOIDmode
5700 && targetm.calls.must_pass_in_stack (mode, type))
5701 return 0;
5702
5703 if (type && AGGREGATE_TYPE_P (type))
5704 {
5705 int i;
5706 tree field;
5707 enum x86_64_reg_class subclasses[MAX_CLASSES];
5708
5709 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5710 if (bytes > 32)
5711 return 0;
5712
5713 for (i = 0; i < words; i++)
5714 classes[i] = X86_64_NO_CLASS;
5715
5716 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5717 signalize memory class, so handle it as special case. */
5718 if (!words)
5719 {
5720 classes[0] = X86_64_NO_CLASS;
5721 return 1;
5722 }
5723
5724 /* Classify each field of record and merge classes. */
5725 switch (TREE_CODE (type))
5726 {
5727 case RECORD_TYPE:
5728 /* And now merge the fields of structure. */
5729 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5730 {
5731 if (TREE_CODE (field) == FIELD_DECL)
5732 {
5733 int num;
5734
5735 if (TREE_TYPE (field) == error_mark_node)
5736 continue;
5737
5738 /* Bitfields are always classified as integer. Handle them
5739 early, since later code would consider them to be
5740 misaligned integers. */
5741 if (DECL_BIT_FIELD (field))
5742 {
5743 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5744 i < ((int_bit_position (field) + (bit_offset % 64))
5745 + tree_low_cst (DECL_SIZE (field), 0)
5746 + 63) / 8 / 8; i++)
5747 classes[i] =
5748 merge_classes (X86_64_INTEGER_CLASS,
5749 classes[i]);
5750 }
5751 else
5752 {
5753 int pos;
5754
5755 type = TREE_TYPE (field);
5756
5757 /* Flexible array member is ignored. */
5758 if (TYPE_MODE (type) == BLKmode
5759 && TREE_CODE (type) == ARRAY_TYPE
5760 && TYPE_SIZE (type) == NULL_TREE
5761 && TYPE_DOMAIN (type) != NULL_TREE
5762 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5763 == NULL_TREE))
5764 {
5765 static bool warned;
5766
5767 if (!warned && warn_psabi)
5768 {
5769 warned = true;
5770 inform (input_location,
5771 "the ABI of passing struct with"
5772 " a flexible array member has"
5773 " changed in GCC 4.4");
5774 }
5775 continue;
5776 }
5777 num = classify_argument (TYPE_MODE (type), type,
5778 subclasses,
5779 (int_bit_position (field)
5780 + bit_offset) % 256);
5781 if (!num)
5782 return 0;
5783 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5784 for (i = 0; i < num && (i + pos) < words; i++)
5785 classes[i + pos] =
5786 merge_classes (subclasses[i], classes[i + pos]);
5787 }
5788 }
5789 }
5790 break;
5791
5792 case ARRAY_TYPE:
5793 /* Arrays are handled as small records. */
5794 {
5795 int num;
5796 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5797 TREE_TYPE (type), subclasses, bit_offset);
5798 if (!num)
5799 return 0;
5800
5801 /* The partial classes are now full classes. */
5802 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5803 subclasses[0] = X86_64_SSE_CLASS;
5804 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5805 && !((bit_offset % 64) == 0 && bytes == 4))
5806 subclasses[0] = X86_64_INTEGER_CLASS;
5807
5808 for (i = 0; i < words; i++)
5809 classes[i] = subclasses[i % num];
5810
5811 break;
5812 }
5813 case UNION_TYPE:
5814 case QUAL_UNION_TYPE:
5815 /* Unions are similar to RECORD_TYPE but offset is always 0.
5816 */
5817 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5818 {
5819 if (TREE_CODE (field) == FIELD_DECL)
5820 {
5821 int num;
5822
5823 if (TREE_TYPE (field) == error_mark_node)
5824 continue;
5825
5826 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5827 TREE_TYPE (field), subclasses,
5828 bit_offset);
5829 if (!num)
5830 return 0;
5831 for (i = 0; i < num; i++)
5832 classes[i] = merge_classes (subclasses[i], classes[i]);
5833 }
5834 }
5835 break;
5836
5837 default:
5838 gcc_unreachable ();
5839 }
5840
5841 if (words > 2)
5842 {
5843 /* When size > 16 bytes, if the first one isn't
5844 X86_64_SSE_CLASS or any other ones aren't
5845 X86_64_SSEUP_CLASS, everything should be passed in
5846 memory. */
5847 if (classes[0] != X86_64_SSE_CLASS)
5848 return 0;
5849
5850 for (i = 1; i < words; i++)
5851 if (classes[i] != X86_64_SSEUP_CLASS)
5852 return 0;
5853 }
5854
5855 /* Final merger cleanup. */
5856 for (i = 0; i < words; i++)
5857 {
5858 /* If one class is MEMORY, everything should be passed in
5859 memory. */
5860 if (classes[i] == X86_64_MEMORY_CLASS)
5861 return 0;
5862
5863 /* The X86_64_SSEUP_CLASS should be always preceded by
5864 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5865 if (classes[i] == X86_64_SSEUP_CLASS
5866 && classes[i - 1] != X86_64_SSE_CLASS
5867 && classes[i - 1] != X86_64_SSEUP_CLASS)
5868 {
5869 /* The first one should never be X86_64_SSEUP_CLASS. */
5870 gcc_assert (i != 0);
5871 classes[i] = X86_64_SSE_CLASS;
5872 }
5873
5874 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5875 everything should be passed in memory. */
5876 if (classes[i] == X86_64_X87UP_CLASS
5877 && (classes[i - 1] != X86_64_X87_CLASS))
5878 {
5879 static bool warned;
5880
5881 /* The first one should never be X86_64_X87UP_CLASS. */
5882 gcc_assert (i != 0);
5883 if (!warned && warn_psabi)
5884 {
5885 warned = true;
5886 inform (input_location,
5887 "the ABI of passing union with long double"
5888 " has changed in GCC 4.4");
5889 }
5890 return 0;
5891 }
5892 }
5893 return words;
5894 }
5895
5896 /* Compute alignment needed. We align all types to natural boundaries with
5897 exception of XFmode that is aligned to 64bits. */
5898 if (mode != VOIDmode && mode != BLKmode)
5899 {
5900 int mode_alignment = GET_MODE_BITSIZE (mode);
5901
5902 if (mode == XFmode)
5903 mode_alignment = 128;
5904 else if (mode == XCmode)
5905 mode_alignment = 256;
5906 if (COMPLEX_MODE_P (mode))
5907 mode_alignment /= 2;
5908 /* Misaligned fields are always returned in memory. */
5909 if (bit_offset % mode_alignment)
5910 return 0;
5911 }
5912
5913 /* for V1xx modes, just use the base mode */
5914 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5915 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5916 mode = GET_MODE_INNER (mode);
5917
5918 /* Classification of atomic types. */
5919 switch (mode)
5920 {
5921 case SDmode:
5922 case DDmode:
5923 classes[0] = X86_64_SSE_CLASS;
5924 return 1;
5925 case TDmode:
5926 classes[0] = X86_64_SSE_CLASS;
5927 classes[1] = X86_64_SSEUP_CLASS;
5928 return 2;
5929 case DImode:
5930 case SImode:
5931 case HImode:
5932 case QImode:
5933 case CSImode:
5934 case CHImode:
5935 case CQImode:
5936 {
5937 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5938
5939 if (size <= 32)
5940 {
5941 classes[0] = X86_64_INTEGERSI_CLASS;
5942 return 1;
5943 }
5944 else if (size <= 64)
5945 {
5946 classes[0] = X86_64_INTEGER_CLASS;
5947 return 1;
5948 }
5949 else if (size <= 64+32)
5950 {
5951 classes[0] = X86_64_INTEGER_CLASS;
5952 classes[1] = X86_64_INTEGERSI_CLASS;
5953 return 2;
5954 }
5955 else if (size <= 64+64)
5956 {
5957 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5958 return 2;
5959 }
5960 else
5961 gcc_unreachable ();
5962 }
5963 case CDImode:
5964 case TImode:
5965 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5966 return 2;
5967 case COImode:
5968 case OImode:
5969 /* OImode shouldn't be used directly. */
5970 gcc_unreachable ();
5971 case CTImode:
5972 return 0;
5973 case SFmode:
5974 if (!(bit_offset % 64))
5975 classes[0] = X86_64_SSESF_CLASS;
5976 else
5977 classes[0] = X86_64_SSE_CLASS;
5978 return 1;
5979 case DFmode:
5980 classes[0] = X86_64_SSEDF_CLASS;
5981 return 1;
5982 case XFmode:
5983 classes[0] = X86_64_X87_CLASS;
5984 classes[1] = X86_64_X87UP_CLASS;
5985 return 2;
5986 case TFmode:
5987 classes[0] = X86_64_SSE_CLASS;
5988 classes[1] = X86_64_SSEUP_CLASS;
5989 return 2;
5990 case SCmode:
5991 classes[0] = X86_64_SSE_CLASS;
5992 if (!(bit_offset % 64))
5993 return 1;
5994 else
5995 {
5996 static bool warned;
5997
5998 if (!warned && warn_psabi)
5999 {
6000 warned = true;
6001 inform (input_location,
6002 "the ABI of passing structure with complex float"
6003 " member has changed in GCC 4.4");
6004 }
6005 classes[1] = X86_64_SSESF_CLASS;
6006 return 2;
6007 }
6008 case DCmode:
6009 classes[0] = X86_64_SSEDF_CLASS;
6010 classes[1] = X86_64_SSEDF_CLASS;
6011 return 2;
6012 case XCmode:
6013 classes[0] = X86_64_COMPLEX_X87_CLASS;
6014 return 1;
6015 case TCmode:
6016 /* This modes is larger than 16 bytes. */
6017 return 0;
6018 case V8SFmode:
6019 case V8SImode:
6020 case V32QImode:
6021 case V16HImode:
6022 case V4DFmode:
6023 case V4DImode:
6024 classes[0] = X86_64_SSE_CLASS;
6025 classes[1] = X86_64_SSEUP_CLASS;
6026 classes[2] = X86_64_SSEUP_CLASS;
6027 classes[3] = X86_64_SSEUP_CLASS;
6028 return 4;
6029 case V4SFmode:
6030 case V4SImode:
6031 case V16QImode:
6032 case V8HImode:
6033 case V2DFmode:
6034 case V2DImode:
6035 classes[0] = X86_64_SSE_CLASS;
6036 classes[1] = X86_64_SSEUP_CLASS;
6037 return 2;
6038 case V1TImode:
6039 case V1DImode:
6040 case V2SFmode:
6041 case V2SImode:
6042 case V4HImode:
6043 case V8QImode:
6044 classes[0] = X86_64_SSE_CLASS;
6045 return 1;
6046 case BLKmode:
6047 case VOIDmode:
6048 return 0;
6049 default:
6050 gcc_assert (VECTOR_MODE_P (mode));
6051
6052 if (bytes > 16)
6053 return 0;
6054
6055 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6056
6057 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6058 classes[0] = X86_64_INTEGERSI_CLASS;
6059 else
6060 classes[0] = X86_64_INTEGER_CLASS;
6061 classes[1] = X86_64_INTEGER_CLASS;
6062 return 1 + (bytes > 8);
6063 }
6064 }
6065
6066 /* Examine the argument and return set number of register required in each
6067 class. Return 0 iff parameter should be passed in memory. */
6068 static int
6069 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6070 int *int_nregs, int *sse_nregs)
6071 {
6072 enum x86_64_reg_class regclass[MAX_CLASSES];
6073 int n = classify_argument (mode, type, regclass, 0);
6074
6075 *int_nregs = 0;
6076 *sse_nregs = 0;
6077 if (!n)
6078 return 0;
6079 for (n--; n >= 0; n--)
6080 switch (regclass[n])
6081 {
6082 case X86_64_INTEGER_CLASS:
6083 case X86_64_INTEGERSI_CLASS:
6084 (*int_nregs)++;
6085 break;
6086 case X86_64_SSE_CLASS:
6087 case X86_64_SSESF_CLASS:
6088 case X86_64_SSEDF_CLASS:
6089 (*sse_nregs)++;
6090 break;
6091 case X86_64_NO_CLASS:
6092 case X86_64_SSEUP_CLASS:
6093 break;
6094 case X86_64_X87_CLASS:
6095 case X86_64_X87UP_CLASS:
6096 if (!in_return)
6097 return 0;
6098 break;
6099 case X86_64_COMPLEX_X87_CLASS:
6100 return in_return ? 2 : 0;
6101 case X86_64_MEMORY_CLASS:
6102 gcc_unreachable ();
6103 }
6104 return 1;
6105 }
6106
6107 /* Construct container for the argument used by GCC interface. See
6108 FUNCTION_ARG for the detailed description. */
6109
6110 static rtx
6111 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6112 const_tree type, int in_return, int nintregs, int nsseregs,
6113 const int *intreg, int sse_regno)
6114 {
6115 /* The following variables hold the static issued_error state. */
6116 static bool issued_sse_arg_error;
6117 static bool issued_sse_ret_error;
6118 static bool issued_x87_ret_error;
6119
6120 enum machine_mode tmpmode;
6121 int bytes =
6122 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6123 enum x86_64_reg_class regclass[MAX_CLASSES];
6124 int n;
6125 int i;
6126 int nexps = 0;
6127 int needed_sseregs, needed_intregs;
6128 rtx exp[MAX_CLASSES];
6129 rtx ret;
6130
6131 n = classify_argument (mode, type, regclass, 0);
6132 if (!n)
6133 return NULL;
6134 if (!examine_argument (mode, type, in_return, &needed_intregs,
6135 &needed_sseregs))
6136 return NULL;
6137 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6138 return NULL;
6139
6140 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6141 some less clueful developer tries to use floating-point anyway. */
6142 if (needed_sseregs && !TARGET_SSE)
6143 {
6144 if (in_return)
6145 {
6146 if (!issued_sse_ret_error)
6147 {
6148 error ("SSE register return with SSE disabled");
6149 issued_sse_ret_error = true;
6150 }
6151 }
6152 else if (!issued_sse_arg_error)
6153 {
6154 error ("SSE register argument with SSE disabled");
6155 issued_sse_arg_error = true;
6156 }
6157 return NULL;
6158 }
6159
6160 /* Likewise, error if the ABI requires us to return values in the
6161 x87 registers and the user specified -mno-80387. */
6162 if (!TARGET_80387 && in_return)
6163 for (i = 0; i < n; i++)
6164 if (regclass[i] == X86_64_X87_CLASS
6165 || regclass[i] == X86_64_X87UP_CLASS
6166 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6167 {
6168 if (!issued_x87_ret_error)
6169 {
6170 error ("x87 register return with x87 disabled");
6171 issued_x87_ret_error = true;
6172 }
6173 return NULL;
6174 }
6175
6176 /* First construct simple cases. Avoid SCmode, since we want to use
6177 single register to pass this type. */
6178 if (n == 1 && mode != SCmode)
6179 switch (regclass[0])
6180 {
6181 case X86_64_INTEGER_CLASS:
6182 case X86_64_INTEGERSI_CLASS:
6183 return gen_rtx_REG (mode, intreg[0]);
6184 case X86_64_SSE_CLASS:
6185 case X86_64_SSESF_CLASS:
6186 case X86_64_SSEDF_CLASS:
6187 if (mode != BLKmode)
6188 return gen_reg_or_parallel (mode, orig_mode,
6189 SSE_REGNO (sse_regno));
6190 break;
6191 case X86_64_X87_CLASS:
6192 case X86_64_COMPLEX_X87_CLASS:
6193 return gen_rtx_REG (mode, FIRST_STACK_REG);
6194 case X86_64_NO_CLASS:
6195 /* Zero sized array, struct or class. */
6196 return NULL;
6197 default:
6198 gcc_unreachable ();
6199 }
6200 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6201 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6202 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6203 if (n == 4
6204 && regclass[0] == X86_64_SSE_CLASS
6205 && regclass[1] == X86_64_SSEUP_CLASS
6206 && regclass[2] == X86_64_SSEUP_CLASS
6207 && regclass[3] == X86_64_SSEUP_CLASS
6208 && mode != BLKmode)
6209 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6210
6211 if (n == 2
6212 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6213 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6214 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6215 && regclass[1] == X86_64_INTEGER_CLASS
6216 && (mode == CDImode || mode == TImode || mode == TFmode)
6217 && intreg[0] + 1 == intreg[1])
6218 return gen_rtx_REG (mode, intreg[0]);
6219
6220 /* Otherwise figure out the entries of the PARALLEL. */
6221 for (i = 0; i < n; i++)
6222 {
6223 int pos;
6224
6225 switch (regclass[i])
6226 {
6227 case X86_64_NO_CLASS:
6228 break;
6229 case X86_64_INTEGER_CLASS:
6230 case X86_64_INTEGERSI_CLASS:
6231 /* Merge TImodes on aligned occasions here too. */
6232 if (i * 8 + 8 > bytes)
6233 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6234 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6235 tmpmode = SImode;
6236 else
6237 tmpmode = DImode;
6238 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6239 if (tmpmode == BLKmode)
6240 tmpmode = DImode;
6241 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6242 gen_rtx_REG (tmpmode, *intreg),
6243 GEN_INT (i*8));
6244 intreg++;
6245 break;
6246 case X86_64_SSESF_CLASS:
6247 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6248 gen_rtx_REG (SFmode,
6249 SSE_REGNO (sse_regno)),
6250 GEN_INT (i*8));
6251 sse_regno++;
6252 break;
6253 case X86_64_SSEDF_CLASS:
6254 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6255 gen_rtx_REG (DFmode,
6256 SSE_REGNO (sse_regno)),
6257 GEN_INT (i*8));
6258 sse_regno++;
6259 break;
6260 case X86_64_SSE_CLASS:
6261 pos = i;
6262 switch (n)
6263 {
6264 case 1:
6265 tmpmode = DImode;
6266 break;
6267 case 2:
6268 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6269 {
6270 tmpmode = TImode;
6271 i++;
6272 }
6273 else
6274 tmpmode = DImode;
6275 break;
6276 case 4:
6277 gcc_assert (i == 0
6278 && regclass[1] == X86_64_SSEUP_CLASS
6279 && regclass[2] == X86_64_SSEUP_CLASS
6280 && regclass[3] == X86_64_SSEUP_CLASS);
6281 tmpmode = OImode;
6282 i += 3;
6283 break;
6284 default:
6285 gcc_unreachable ();
6286 }
6287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6288 gen_rtx_REG (tmpmode,
6289 SSE_REGNO (sse_regno)),
6290 GEN_INT (pos*8));
6291 sse_regno++;
6292 break;
6293 default:
6294 gcc_unreachable ();
6295 }
6296 }
6297
6298 /* Empty aligned struct, union or class. */
6299 if (nexps == 0)
6300 return NULL;
6301
6302 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6303 for (i = 0; i < nexps; i++)
6304 XVECEXP (ret, 0, i) = exp [i];
6305 return ret;
6306 }
6307
6308 /* Update the data in CUM to advance over an argument of mode MODE
6309 and data type TYPE. (TYPE is null for libcalls where that information
6310 may not be available.) */
6311
6312 static void
6313 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6314 const_tree type, HOST_WIDE_INT bytes,
6315 HOST_WIDE_INT words)
6316 {
6317 switch (mode)
6318 {
6319 default:
6320 break;
6321
6322 case BLKmode:
6323 if (bytes < 0)
6324 break;
6325 /* FALLTHRU */
6326
6327 case DImode:
6328 case SImode:
6329 case HImode:
6330 case QImode:
6331 cum->words += words;
6332 cum->nregs -= words;
6333 cum->regno += words;
6334
6335 if (cum->nregs <= 0)
6336 {
6337 cum->nregs = 0;
6338 cum->regno = 0;
6339 }
6340 break;
6341
6342 case OImode:
6343 /* OImode shouldn't be used directly. */
6344 gcc_unreachable ();
6345
6346 case DFmode:
6347 if (cum->float_in_sse < 2)
6348 break;
6349 case SFmode:
6350 if (cum->float_in_sse < 1)
6351 break;
6352 /* FALLTHRU */
6353
6354 case V8SFmode:
6355 case V8SImode:
6356 case V32QImode:
6357 case V16HImode:
6358 case V4DFmode:
6359 case V4DImode:
6360 case TImode:
6361 case V16QImode:
6362 case V8HImode:
6363 case V4SImode:
6364 case V2DImode:
6365 case V4SFmode:
6366 case V2DFmode:
6367 if (!type || !AGGREGATE_TYPE_P (type))
6368 {
6369 cum->sse_words += words;
6370 cum->sse_nregs -= 1;
6371 cum->sse_regno += 1;
6372 if (cum->sse_nregs <= 0)
6373 {
6374 cum->sse_nregs = 0;
6375 cum->sse_regno = 0;
6376 }
6377 }
6378 break;
6379
6380 case V8QImode:
6381 case V4HImode:
6382 case V2SImode:
6383 case V2SFmode:
6384 case V1TImode:
6385 case V1DImode:
6386 if (!type || !AGGREGATE_TYPE_P (type))
6387 {
6388 cum->mmx_words += words;
6389 cum->mmx_nregs -= 1;
6390 cum->mmx_regno += 1;
6391 if (cum->mmx_nregs <= 0)
6392 {
6393 cum->mmx_nregs = 0;
6394 cum->mmx_regno = 0;
6395 }
6396 }
6397 break;
6398 }
6399 }
6400
6401 static void
6402 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6403 const_tree type, HOST_WIDE_INT words, bool named)
6404 {
6405 int int_nregs, sse_nregs;
6406
6407 /* Unnamed 256bit vector mode parameters are passed on stack. */
6408 if (!named && VALID_AVX256_REG_MODE (mode))
6409 return;
6410
6411 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6412 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6413 {
6414 cum->nregs -= int_nregs;
6415 cum->sse_nregs -= sse_nregs;
6416 cum->regno += int_nregs;
6417 cum->sse_regno += sse_nregs;
6418 }
6419 else
6420 {
6421 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6422 cum->words = (cum->words + align - 1) & ~(align - 1);
6423 cum->words += words;
6424 }
6425 }
6426
6427 static void
6428 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6429 HOST_WIDE_INT words)
6430 {
6431 /* Otherwise, this should be passed indirect. */
6432 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6433
6434 cum->words += words;
6435 if (cum->nregs > 0)
6436 {
6437 cum->nregs -= 1;
6438 cum->regno += 1;
6439 }
6440 }
6441
6442 /* Update the data in CUM to advance over an argument of mode MODE and
6443 data type TYPE. (TYPE is null for libcalls where that information
6444 may not be available.) */
6445
6446 static void
6447 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6448 const_tree type, bool named)
6449 {
6450 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6451 HOST_WIDE_INT bytes, words;
6452
6453 if (mode == BLKmode)
6454 bytes = int_size_in_bytes (type);
6455 else
6456 bytes = GET_MODE_SIZE (mode);
6457 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6458
6459 if (type)
6460 mode = type_natural_mode (type, NULL);
6461
6462 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6463 function_arg_advance_ms_64 (cum, bytes, words);
6464 else if (TARGET_64BIT)
6465 function_arg_advance_64 (cum, mode, type, words, named);
6466 else
6467 function_arg_advance_32 (cum, mode, type, bytes, words);
6468 }
6469
6470 /* Define where to put the arguments to a function.
6471 Value is zero to push the argument on the stack,
6472 or a hard register in which to store the argument.
6473
6474 MODE is the argument's machine mode.
6475 TYPE is the data type of the argument (as a tree).
6476 This is null for libcalls where that information may
6477 not be available.
6478 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6479 the preceding args and about the function being called.
6480 NAMED is nonzero if this argument is a named parameter
6481 (otherwise it is an extra parameter matching an ellipsis). */
6482
6483 static rtx
6484 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6485 enum machine_mode orig_mode, const_tree type,
6486 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6487 {
6488 static bool warnedsse, warnedmmx;
6489
6490 /* Avoid the AL settings for the Unix64 ABI. */
6491 if (mode == VOIDmode)
6492 return constm1_rtx;
6493
6494 switch (mode)
6495 {
6496 default:
6497 break;
6498
6499 case BLKmode:
6500 if (bytes < 0)
6501 break;
6502 /* FALLTHRU */
6503 case DImode:
6504 case SImode:
6505 case HImode:
6506 case QImode:
6507 if (words <= cum->nregs)
6508 {
6509 int regno = cum->regno;
6510
6511 /* Fastcall allocates the first two DWORD (SImode) or
6512 smaller arguments to ECX and EDX if it isn't an
6513 aggregate type . */
6514 if (cum->fastcall)
6515 {
6516 if (mode == BLKmode
6517 || mode == DImode
6518 || (type && AGGREGATE_TYPE_P (type)))
6519 break;
6520
6521 /* ECX not EAX is the first allocated register. */
6522 if (regno == AX_REG)
6523 regno = CX_REG;
6524 }
6525 return gen_rtx_REG (mode, regno);
6526 }
6527 break;
6528
6529 case DFmode:
6530 if (cum->float_in_sse < 2)
6531 break;
6532 case SFmode:
6533 if (cum->float_in_sse < 1)
6534 break;
6535 /* FALLTHRU */
6536 case TImode:
6537 /* In 32bit, we pass TImode in xmm registers. */
6538 case V16QImode:
6539 case V8HImode:
6540 case V4SImode:
6541 case V2DImode:
6542 case V4SFmode:
6543 case V2DFmode:
6544 if (!type || !AGGREGATE_TYPE_P (type))
6545 {
6546 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6547 {
6548 warnedsse = true;
6549 warning (0, "SSE vector argument without SSE enabled "
6550 "changes the ABI");
6551 }
6552 if (cum->sse_nregs)
6553 return gen_reg_or_parallel (mode, orig_mode,
6554 cum->sse_regno + FIRST_SSE_REG);
6555 }
6556 break;
6557
6558 case OImode:
6559 /* OImode shouldn't be used directly. */
6560 gcc_unreachable ();
6561
6562 case V8SFmode:
6563 case V8SImode:
6564 case V32QImode:
6565 case V16HImode:
6566 case V4DFmode:
6567 case V4DImode:
6568 if (!type || !AGGREGATE_TYPE_P (type))
6569 {
6570 if (cum->sse_nregs)
6571 return gen_reg_or_parallel (mode, orig_mode,
6572 cum->sse_regno + FIRST_SSE_REG);
6573 }
6574 break;
6575
6576 case V8QImode:
6577 case V4HImode:
6578 case V2SImode:
6579 case V2SFmode:
6580 case V1TImode:
6581 case V1DImode:
6582 if (!type || !AGGREGATE_TYPE_P (type))
6583 {
6584 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6585 {
6586 warnedmmx = true;
6587 warning (0, "MMX vector argument without MMX enabled "
6588 "changes the ABI");
6589 }
6590 if (cum->mmx_nregs)
6591 return gen_reg_or_parallel (mode, orig_mode,
6592 cum->mmx_regno + FIRST_MMX_REG);
6593 }
6594 break;
6595 }
6596
6597 return NULL_RTX;
6598 }
6599
6600 static rtx
6601 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6602 enum machine_mode orig_mode, const_tree type, bool named)
6603 {
6604 /* Handle a hidden AL argument containing number of registers
6605 for varargs x86-64 functions. */
6606 if (mode == VOIDmode)
6607 return GEN_INT (cum->maybe_vaarg
6608 ? (cum->sse_nregs < 0
6609 ? X86_64_SSE_REGPARM_MAX
6610 : cum->sse_regno)
6611 : -1);
6612
6613 switch (mode)
6614 {
6615 default:
6616 break;
6617
6618 case V8SFmode:
6619 case V8SImode:
6620 case V32QImode:
6621 case V16HImode:
6622 case V4DFmode:
6623 case V4DImode:
6624 /* Unnamed 256bit vector mode parameters are passed on stack. */
6625 if (!named)
6626 return NULL;
6627 break;
6628 }
6629
6630 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6631 cum->sse_nregs,
6632 &x86_64_int_parameter_registers [cum->regno],
6633 cum->sse_regno);
6634 }
6635
6636 static rtx
6637 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6638 enum machine_mode orig_mode, bool named,
6639 HOST_WIDE_INT bytes)
6640 {
6641 unsigned int regno;
6642
6643 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6644 We use value of -2 to specify that current function call is MSABI. */
6645 if (mode == VOIDmode)
6646 return GEN_INT (-2);
6647
6648 /* If we've run out of registers, it goes on the stack. */
6649 if (cum->nregs == 0)
6650 return NULL_RTX;
6651
6652 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6653
6654 /* Only floating point modes are passed in anything but integer regs. */
6655 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6656 {
6657 if (named)
6658 regno = cum->regno + FIRST_SSE_REG;
6659 else
6660 {
6661 rtx t1, t2;
6662
6663 /* Unnamed floating parameters are passed in both the
6664 SSE and integer registers. */
6665 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6666 t2 = gen_rtx_REG (mode, regno);
6667 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6668 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6669 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6670 }
6671 }
6672 /* Handle aggregated types passed in register. */
6673 if (orig_mode == BLKmode)
6674 {
6675 if (bytes > 0 && bytes <= 8)
6676 mode = (bytes > 4 ? DImode : SImode);
6677 if (mode == BLKmode)
6678 mode = DImode;
6679 }
6680
6681 return gen_reg_or_parallel (mode, orig_mode, regno);
6682 }
6683
6684 /* Return where to put the arguments to a function.
6685 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6686
6687 MODE is the argument's machine mode. TYPE is the data type of the
6688 argument. It is null for libcalls where that information may not be
6689 available. CUM gives information about the preceding args and about
6690 the function being called. NAMED is nonzero if this argument is a
6691 named parameter (otherwise it is an extra parameter matching an
6692 ellipsis). */
6693
6694 static rtx
6695 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6696 const_tree type, bool named)
6697 {
6698 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6699 enum machine_mode mode = omode;
6700 HOST_WIDE_INT bytes, words;
6701 rtx arg;
6702
6703 if (mode == BLKmode)
6704 bytes = int_size_in_bytes (type);
6705 else
6706 bytes = GET_MODE_SIZE (mode);
6707 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6708
6709 /* To simplify the code below, represent vector types with a vector mode
6710 even if MMX/SSE are not active. */
6711 if (type && TREE_CODE (type) == VECTOR_TYPE)
6712 mode = type_natural_mode (type, cum);
6713
6714 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6715 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6716 else if (TARGET_64BIT)
6717 arg = function_arg_64 (cum, mode, omode, type, named);
6718 else
6719 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6720
6721 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6722 {
6723 /* This argument uses 256bit AVX modes. */
6724 if (cum->caller)
6725 cfun->machine->callee_pass_avx256_p = true;
6726 else
6727 cfun->machine->caller_pass_avx256_p = true;
6728 }
6729
6730 return arg;
6731 }
6732
6733 /* A C expression that indicates when an argument must be passed by
6734 reference. If nonzero for an argument, a copy of that argument is
6735 made in memory and a pointer to the argument is passed instead of
6736 the argument itself. The pointer is passed in whatever way is
6737 appropriate for passing a pointer to that type. */
6738
6739 static bool
6740 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6741 enum machine_mode mode ATTRIBUTE_UNUSED,
6742 const_tree type, bool named ATTRIBUTE_UNUSED)
6743 {
6744 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6745
6746 /* See Windows x64 Software Convention. */
6747 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6748 {
6749 int msize = (int) GET_MODE_SIZE (mode);
6750 if (type)
6751 {
6752 /* Arrays are passed by reference. */
6753 if (TREE_CODE (type) == ARRAY_TYPE)
6754 return true;
6755
6756 if (AGGREGATE_TYPE_P (type))
6757 {
6758 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6759 are passed by reference. */
6760 msize = int_size_in_bytes (type);
6761 }
6762 }
6763
6764 /* __m128 is passed by reference. */
6765 switch (msize) {
6766 case 1: case 2: case 4: case 8:
6767 break;
6768 default:
6769 return true;
6770 }
6771 }
6772 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6773 return 1;
6774
6775 return 0;
6776 }
6777
6778 /* Return true when TYPE should be 128bit aligned for 32bit argument
6779 passing ABI. XXX: This function is obsolete and is only used for
6780 checking psABI compatibility with previous versions of GCC. */
6781
6782 static bool
6783 ix86_compat_aligned_value_p (const_tree type)
6784 {
6785 enum machine_mode mode = TYPE_MODE (type);
6786 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6787 || mode == TDmode
6788 || mode == TFmode
6789 || mode == TCmode)
6790 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6791 return true;
6792 if (TYPE_ALIGN (type) < 128)
6793 return false;
6794
6795 if (AGGREGATE_TYPE_P (type))
6796 {
6797 /* Walk the aggregates recursively. */
6798 switch (TREE_CODE (type))
6799 {
6800 case RECORD_TYPE:
6801 case UNION_TYPE:
6802 case QUAL_UNION_TYPE:
6803 {
6804 tree field;
6805
6806 /* Walk all the structure fields. */
6807 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6808 {
6809 if (TREE_CODE (field) == FIELD_DECL
6810 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6811 return true;
6812 }
6813 break;
6814 }
6815
6816 case ARRAY_TYPE:
6817 /* Just for use if some languages passes arrays by value. */
6818 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6819 return true;
6820 break;
6821
6822 default:
6823 gcc_unreachable ();
6824 }
6825 }
6826 return false;
6827 }
6828
6829 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6830 XXX: This function is obsolete and is only used for checking psABI
6831 compatibility with previous versions of GCC. */
6832
6833 static unsigned int
6834 ix86_compat_function_arg_boundary (enum machine_mode mode,
6835 const_tree type, unsigned int align)
6836 {
6837 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6838 natural boundaries. */
6839 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6840 {
6841 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6842 make an exception for SSE modes since these require 128bit
6843 alignment.
6844
6845 The handling here differs from field_alignment. ICC aligns MMX
6846 arguments to 4 byte boundaries, while structure fields are aligned
6847 to 8 byte boundaries. */
6848 if (!type)
6849 {
6850 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6851 align = PARM_BOUNDARY;
6852 }
6853 else
6854 {
6855 if (!ix86_compat_aligned_value_p (type))
6856 align = PARM_BOUNDARY;
6857 }
6858 }
6859 if (align > BIGGEST_ALIGNMENT)
6860 align = BIGGEST_ALIGNMENT;
6861 return align;
6862 }
6863
6864 /* Return true when TYPE should be 128bit aligned for 32bit argument
6865 passing ABI. */
6866
6867 static bool
6868 ix86_contains_aligned_value_p (const_tree type)
6869 {
6870 enum machine_mode mode = TYPE_MODE (type);
6871
6872 if (mode == XFmode || mode == XCmode)
6873 return false;
6874
6875 if (TYPE_ALIGN (type) < 128)
6876 return false;
6877
6878 if (AGGREGATE_TYPE_P (type))
6879 {
6880 /* Walk the aggregates recursively. */
6881 switch (TREE_CODE (type))
6882 {
6883 case RECORD_TYPE:
6884 case UNION_TYPE:
6885 case QUAL_UNION_TYPE:
6886 {
6887 tree field;
6888
6889 /* Walk all the structure fields. */
6890 for (field = TYPE_FIELDS (type);
6891 field;
6892 field = DECL_CHAIN (field))
6893 {
6894 if (TREE_CODE (field) == FIELD_DECL
6895 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6896 return true;
6897 }
6898 break;
6899 }
6900
6901 case ARRAY_TYPE:
6902 /* Just for use if some languages passes arrays by value. */
6903 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6904 return true;
6905 break;
6906
6907 default:
6908 gcc_unreachable ();
6909 }
6910 }
6911 else
6912 return TYPE_ALIGN (type) >= 128;
6913
6914 return false;
6915 }
6916
6917 /* Gives the alignment boundary, in bits, of an argument with the
6918 specified mode and type. */
6919
6920 static unsigned int
6921 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6922 {
6923 unsigned int align;
6924 if (type)
6925 {
6926 /* Since the main variant type is used for call, we convert it to
6927 the main variant type. */
6928 type = TYPE_MAIN_VARIANT (type);
6929 align = TYPE_ALIGN (type);
6930 }
6931 else
6932 align = GET_MODE_ALIGNMENT (mode);
6933 if (align < PARM_BOUNDARY)
6934 align = PARM_BOUNDARY;
6935 else
6936 {
6937 static bool warned;
6938 unsigned int saved_align = align;
6939
6940 if (!TARGET_64BIT)
6941 {
6942 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6943 if (!type)
6944 {
6945 if (mode == XFmode || mode == XCmode)
6946 align = PARM_BOUNDARY;
6947 }
6948 else if (!ix86_contains_aligned_value_p (type))
6949 align = PARM_BOUNDARY;
6950
6951 if (align < 128)
6952 align = PARM_BOUNDARY;
6953 }
6954
6955 if (warn_psabi
6956 && !warned
6957 && align != ix86_compat_function_arg_boundary (mode, type,
6958 saved_align))
6959 {
6960 warned = true;
6961 inform (input_location,
6962 "The ABI for passing parameters with %d-byte"
6963 " alignment has changed in GCC 4.6",
6964 align / BITS_PER_UNIT);
6965 }
6966 }
6967
6968 return align;
6969 }
6970
6971 /* Return true if N is a possible register number of function value. */
6972
6973 static bool
6974 ix86_function_value_regno_p (const unsigned int regno)
6975 {
6976 switch (regno)
6977 {
6978 case 0:
6979 return true;
6980
6981 case FIRST_FLOAT_REG:
6982 /* TODO: The function should depend on current function ABI but
6983 builtins.c would need updating then. Therefore we use the
6984 default ABI. */
6985 if (TARGET_64BIT && ix86_abi == MS_ABI)
6986 return false;
6987 return TARGET_FLOAT_RETURNS_IN_80387;
6988
6989 case FIRST_SSE_REG:
6990 return TARGET_SSE;
6991
6992 case FIRST_MMX_REG:
6993 if (TARGET_MACHO || TARGET_64BIT)
6994 return false;
6995 return TARGET_MMX;
6996 }
6997
6998 return false;
6999 }
7000
7001 /* Define how to find the value returned by a function.
7002 VALTYPE is the data type of the value (as a tree).
7003 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7004 otherwise, FUNC is 0. */
7005
7006 static rtx
7007 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7008 const_tree fntype, const_tree fn)
7009 {
7010 unsigned int regno;
7011
7012 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7013 we normally prevent this case when mmx is not available. However
7014 some ABIs may require the result to be returned like DImode. */
7015 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7016 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7017
7018 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7019 we prevent this case when sse is not available. However some ABIs
7020 may require the result to be returned like integer TImode. */
7021 else if (mode == TImode
7022 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7023 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7024
7025 /* 32-byte vector modes in %ymm0. */
7026 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7027 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7028
7029 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7030 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7031 regno = FIRST_FLOAT_REG;
7032 else
7033 /* Most things go in %eax. */
7034 regno = AX_REG;
7035
7036 /* Override FP return register with %xmm0 for local functions when
7037 SSE math is enabled or for functions with sseregparm attribute. */
7038 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7039 {
7040 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7041 if ((sse_level >= 1 && mode == SFmode)
7042 || (sse_level == 2 && mode == DFmode))
7043 regno = FIRST_SSE_REG;
7044 }
7045
7046 /* OImode shouldn't be used directly. */
7047 gcc_assert (mode != OImode);
7048
7049 return gen_rtx_REG (orig_mode, regno);
7050 }
7051
7052 static rtx
7053 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7054 const_tree valtype)
7055 {
7056 rtx ret;
7057
7058 /* Handle libcalls, which don't provide a type node. */
7059 if (valtype == NULL)
7060 {
7061 switch (mode)
7062 {
7063 case SFmode:
7064 case SCmode:
7065 case DFmode:
7066 case DCmode:
7067 case TFmode:
7068 case SDmode:
7069 case DDmode:
7070 case TDmode:
7071 return gen_rtx_REG (mode, FIRST_SSE_REG);
7072 case XFmode:
7073 case XCmode:
7074 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7075 case TCmode:
7076 return NULL;
7077 default:
7078 return gen_rtx_REG (mode, AX_REG);
7079 }
7080 }
7081 else if (POINTER_TYPE_P (valtype))
7082 {
7083 /* Pointers are always returned in Pmode. */
7084 mode = Pmode;
7085 }
7086
7087 ret = construct_container (mode, orig_mode, valtype, 1,
7088 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7089 x86_64_int_return_registers, 0);
7090
7091 /* For zero sized structures, construct_container returns NULL, but we
7092 need to keep rest of compiler happy by returning meaningful value. */
7093 if (!ret)
7094 ret = gen_rtx_REG (orig_mode, AX_REG);
7095
7096 return ret;
7097 }
7098
7099 static rtx
7100 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7101 {
7102 unsigned int regno = AX_REG;
7103
7104 if (TARGET_SSE)
7105 {
7106 switch (GET_MODE_SIZE (mode))
7107 {
7108 case 16:
7109 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7110 && !COMPLEX_MODE_P (mode))
7111 regno = FIRST_SSE_REG;
7112 break;
7113 case 8:
7114 case 4:
7115 if (mode == SFmode || mode == DFmode)
7116 regno = FIRST_SSE_REG;
7117 break;
7118 default:
7119 break;
7120 }
7121 }
7122 return gen_rtx_REG (orig_mode, regno);
7123 }
7124
7125 static rtx
7126 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7127 enum machine_mode orig_mode, enum machine_mode mode)
7128 {
7129 const_tree fn, fntype;
7130
7131 fn = NULL_TREE;
7132 if (fntype_or_decl && DECL_P (fntype_or_decl))
7133 fn = fntype_or_decl;
7134 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7135
7136 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7137 return function_value_ms_64 (orig_mode, mode);
7138 else if (TARGET_64BIT)
7139 return function_value_64 (orig_mode, mode, valtype);
7140 else
7141 return function_value_32 (orig_mode, mode, fntype, fn);
7142 }
7143
7144 static rtx
7145 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7146 bool outgoing ATTRIBUTE_UNUSED)
7147 {
7148 enum machine_mode mode, orig_mode;
7149
7150 orig_mode = TYPE_MODE (valtype);
7151 mode = type_natural_mode (valtype, NULL);
7152 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7153 }
7154
7155 /* Pointer function arguments and return values are promoted to Pmode. */
7156
7157 static enum machine_mode
7158 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7159 int *punsignedp, const_tree fntype,
7160 int for_return)
7161 {
7162 if (type != NULL_TREE && POINTER_TYPE_P (type))
7163 {
7164 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7165 return Pmode;
7166 }
7167 return default_promote_function_mode (type, mode, punsignedp, fntype,
7168 for_return);
7169 }
7170
7171 rtx
7172 ix86_libcall_value (enum machine_mode mode)
7173 {
7174 return ix86_function_value_1 (NULL, NULL, mode, mode);
7175 }
7176
7177 /* Return true iff type is returned in memory. */
7178
7179 static bool ATTRIBUTE_UNUSED
7180 return_in_memory_32 (const_tree type, enum machine_mode mode)
7181 {
7182 HOST_WIDE_INT size;
7183
7184 if (mode == BLKmode)
7185 return true;
7186
7187 size = int_size_in_bytes (type);
7188
7189 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7190 return false;
7191
7192 if (VECTOR_MODE_P (mode) || mode == TImode)
7193 {
7194 /* User-created vectors small enough to fit in EAX. */
7195 if (size < 8)
7196 return false;
7197
7198 /* MMX/3dNow values are returned in MM0,
7199 except when it doesn't exits or the ABI prescribes otherwise. */
7200 if (size == 8)
7201 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7202
7203 /* SSE values are returned in XMM0, except when it doesn't exist. */
7204 if (size == 16)
7205 return !TARGET_SSE;
7206
7207 /* AVX values are returned in YMM0, except when it doesn't exist. */
7208 if (size == 32)
7209 return !TARGET_AVX;
7210 }
7211
7212 if (mode == XFmode)
7213 return false;
7214
7215 if (size > 12)
7216 return true;
7217
7218 /* OImode shouldn't be used directly. */
7219 gcc_assert (mode != OImode);
7220
7221 return false;
7222 }
7223
7224 static bool ATTRIBUTE_UNUSED
7225 return_in_memory_64 (const_tree type, enum machine_mode mode)
7226 {
7227 int needed_intregs, needed_sseregs;
7228 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7229 }
7230
7231 static bool ATTRIBUTE_UNUSED
7232 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7233 {
7234 HOST_WIDE_INT size = int_size_in_bytes (type);
7235
7236 /* __m128 is returned in xmm0. */
7237 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7238 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7239 return false;
7240
7241 /* Otherwise, the size must be exactly in [1248]. */
7242 return size != 1 && size != 2 && size != 4 && size != 8;
7243 }
7244
7245 static bool
7246 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7247 {
7248 #ifdef SUBTARGET_RETURN_IN_MEMORY
7249 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7250 #else
7251 const enum machine_mode mode = type_natural_mode (type, NULL);
7252
7253 if (TARGET_64BIT)
7254 {
7255 if (ix86_function_type_abi (fntype) == MS_ABI)
7256 return return_in_memory_ms_64 (type, mode);
7257 else
7258 return return_in_memory_64 (type, mode);
7259 }
7260 else
7261 return return_in_memory_32 (type, mode);
7262 #endif
7263 }
7264
7265 /* When returning SSE vector types, we have a choice of either
7266 (1) being abi incompatible with a -march switch, or
7267 (2) generating an error.
7268 Given no good solution, I think the safest thing is one warning.
7269 The user won't be able to use -Werror, but....
7270
7271 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7272 called in response to actually generating a caller or callee that
7273 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7274 via aggregate_value_p for general type probing from tree-ssa. */
7275
7276 static rtx
7277 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7278 {
7279 static bool warnedsse, warnedmmx;
7280
7281 if (!TARGET_64BIT && type)
7282 {
7283 /* Look at the return type of the function, not the function type. */
7284 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7285
7286 if (!TARGET_SSE && !warnedsse)
7287 {
7288 if (mode == TImode
7289 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7290 {
7291 warnedsse = true;
7292 warning (0, "SSE vector return without SSE enabled "
7293 "changes the ABI");
7294 }
7295 }
7296
7297 if (!TARGET_MMX && !warnedmmx)
7298 {
7299 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7300 {
7301 warnedmmx = true;
7302 warning (0, "MMX vector return without MMX enabled "
7303 "changes the ABI");
7304 }
7305 }
7306 }
7307
7308 return NULL;
7309 }
7310
7311 \f
7312 /* Create the va_list data type. */
7313
7314 /* Returns the calling convention specific va_list date type.
7315 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7316
7317 static tree
7318 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7319 {
7320 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7321
7322 /* For i386 we use plain pointer to argument area. */
7323 if (!TARGET_64BIT || abi == MS_ABI)
7324 return build_pointer_type (char_type_node);
7325
7326 record = lang_hooks.types.make_type (RECORD_TYPE);
7327 type_decl = build_decl (BUILTINS_LOCATION,
7328 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7329
7330 f_gpr = build_decl (BUILTINS_LOCATION,
7331 FIELD_DECL, get_identifier ("gp_offset"),
7332 unsigned_type_node);
7333 f_fpr = build_decl (BUILTINS_LOCATION,
7334 FIELD_DECL, get_identifier ("fp_offset"),
7335 unsigned_type_node);
7336 f_ovf = build_decl (BUILTINS_LOCATION,
7337 FIELD_DECL, get_identifier ("overflow_arg_area"),
7338 ptr_type_node);
7339 f_sav = build_decl (BUILTINS_LOCATION,
7340 FIELD_DECL, get_identifier ("reg_save_area"),
7341 ptr_type_node);
7342
7343 va_list_gpr_counter_field = f_gpr;
7344 va_list_fpr_counter_field = f_fpr;
7345
7346 DECL_FIELD_CONTEXT (f_gpr) = record;
7347 DECL_FIELD_CONTEXT (f_fpr) = record;
7348 DECL_FIELD_CONTEXT (f_ovf) = record;
7349 DECL_FIELD_CONTEXT (f_sav) = record;
7350
7351 TYPE_STUB_DECL (record) = type_decl;
7352 TYPE_NAME (record) = type_decl;
7353 TYPE_FIELDS (record) = f_gpr;
7354 DECL_CHAIN (f_gpr) = f_fpr;
7355 DECL_CHAIN (f_fpr) = f_ovf;
7356 DECL_CHAIN (f_ovf) = f_sav;
7357
7358 layout_type (record);
7359
7360 /* The correct type is an array type of one element. */
7361 return build_array_type (record, build_index_type (size_zero_node));
7362 }
7363
7364 /* Setup the builtin va_list data type and for 64-bit the additional
7365 calling convention specific va_list data types. */
7366
7367 static tree
7368 ix86_build_builtin_va_list (void)
7369 {
7370 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7371
7372 /* Initialize abi specific va_list builtin types. */
7373 if (TARGET_64BIT)
7374 {
7375 tree t;
7376 if (ix86_abi == MS_ABI)
7377 {
7378 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7379 if (TREE_CODE (t) != RECORD_TYPE)
7380 t = build_variant_type_copy (t);
7381 sysv_va_list_type_node = t;
7382 }
7383 else
7384 {
7385 t = ret;
7386 if (TREE_CODE (t) != RECORD_TYPE)
7387 t = build_variant_type_copy (t);
7388 sysv_va_list_type_node = t;
7389 }
7390 if (ix86_abi != MS_ABI)
7391 {
7392 t = ix86_build_builtin_va_list_abi (MS_ABI);
7393 if (TREE_CODE (t) != RECORD_TYPE)
7394 t = build_variant_type_copy (t);
7395 ms_va_list_type_node = t;
7396 }
7397 else
7398 {
7399 t = ret;
7400 if (TREE_CODE (t) != RECORD_TYPE)
7401 t = build_variant_type_copy (t);
7402 ms_va_list_type_node = t;
7403 }
7404 }
7405
7406 return ret;
7407 }
7408
7409 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7410
7411 static void
7412 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7413 {
7414 rtx save_area, mem;
7415 alias_set_type set;
7416 int i, max;
7417
7418 /* GPR size of varargs save area. */
7419 if (cfun->va_list_gpr_size)
7420 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7421 else
7422 ix86_varargs_gpr_size = 0;
7423
7424 /* FPR size of varargs save area. We don't need it if we don't pass
7425 anything in SSE registers. */
7426 if (TARGET_SSE && cfun->va_list_fpr_size)
7427 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7428 else
7429 ix86_varargs_fpr_size = 0;
7430
7431 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7432 return;
7433
7434 save_area = frame_pointer_rtx;
7435 set = get_varargs_alias_set ();
7436
7437 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7438 if (max > X86_64_REGPARM_MAX)
7439 max = X86_64_REGPARM_MAX;
7440
7441 for (i = cum->regno; i < max; i++)
7442 {
7443 mem = gen_rtx_MEM (Pmode,
7444 plus_constant (save_area, i * UNITS_PER_WORD));
7445 MEM_NOTRAP_P (mem) = 1;
7446 set_mem_alias_set (mem, set);
7447 emit_move_insn (mem, gen_rtx_REG (Pmode,
7448 x86_64_int_parameter_registers[i]));
7449 }
7450
7451 if (ix86_varargs_fpr_size)
7452 {
7453 enum machine_mode smode;
7454 rtx label, test;
7455
7456 /* Now emit code to save SSE registers. The AX parameter contains number
7457 of SSE parameter registers used to call this function, though all we
7458 actually check here is the zero/non-zero status. */
7459
7460 label = gen_label_rtx ();
7461 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7462 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7463 label));
7464
7465 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7466 we used movdqa (i.e. TImode) instead? Perhaps even better would
7467 be if we could determine the real mode of the data, via a hook
7468 into pass_stdarg. Ignore all that for now. */
7469 smode = V4SFmode;
7470 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7471 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7472
7473 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7474 if (max > X86_64_SSE_REGPARM_MAX)
7475 max = X86_64_SSE_REGPARM_MAX;
7476
7477 for (i = cum->sse_regno; i < max; ++i)
7478 {
7479 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7480 mem = gen_rtx_MEM (smode, mem);
7481 MEM_NOTRAP_P (mem) = 1;
7482 set_mem_alias_set (mem, set);
7483 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7484
7485 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7486 }
7487
7488 emit_label (label);
7489 }
7490 }
7491
7492 static void
7493 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7494 {
7495 alias_set_type set = get_varargs_alias_set ();
7496 int i;
7497
7498 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7499 {
7500 rtx reg, mem;
7501
7502 mem = gen_rtx_MEM (Pmode,
7503 plus_constant (virtual_incoming_args_rtx,
7504 i * UNITS_PER_WORD));
7505 MEM_NOTRAP_P (mem) = 1;
7506 set_mem_alias_set (mem, set);
7507
7508 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7509 emit_move_insn (mem, reg);
7510 }
7511 }
7512
7513 static void
7514 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7515 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7516 int no_rtl)
7517 {
7518 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7519 CUMULATIVE_ARGS next_cum;
7520 tree fntype;
7521
7522 /* This argument doesn't appear to be used anymore. Which is good,
7523 because the old code here didn't suppress rtl generation. */
7524 gcc_assert (!no_rtl);
7525
7526 if (!TARGET_64BIT)
7527 return;
7528
7529 fntype = TREE_TYPE (current_function_decl);
7530
7531 /* For varargs, we do not want to skip the dummy va_dcl argument.
7532 For stdargs, we do want to skip the last named argument. */
7533 next_cum = *cum;
7534 if (stdarg_p (fntype))
7535 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7536 true);
7537
7538 if (cum->call_abi == MS_ABI)
7539 setup_incoming_varargs_ms_64 (&next_cum);
7540 else
7541 setup_incoming_varargs_64 (&next_cum);
7542 }
7543
7544 /* Checks if TYPE is of kind va_list char *. */
7545
7546 static bool
7547 is_va_list_char_pointer (tree type)
7548 {
7549 tree canonic;
7550
7551 /* For 32-bit it is always true. */
7552 if (!TARGET_64BIT)
7553 return true;
7554 canonic = ix86_canonical_va_list_type (type);
7555 return (canonic == ms_va_list_type_node
7556 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7557 }
7558
7559 /* Implement va_start. */
7560
7561 static void
7562 ix86_va_start (tree valist, rtx nextarg)
7563 {
7564 HOST_WIDE_INT words, n_gpr, n_fpr;
7565 tree f_gpr, f_fpr, f_ovf, f_sav;
7566 tree gpr, fpr, ovf, sav, t;
7567 tree type;
7568 rtx ovf_rtx;
7569
7570 if (flag_split_stack
7571 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7572 {
7573 unsigned int scratch_regno;
7574
7575 /* When we are splitting the stack, we can't refer to the stack
7576 arguments using internal_arg_pointer, because they may be on
7577 the old stack. The split stack prologue will arrange to
7578 leave a pointer to the old stack arguments in a scratch
7579 register, which we here copy to a pseudo-register. The split
7580 stack prologue can't set the pseudo-register directly because
7581 it (the prologue) runs before any registers have been saved. */
7582
7583 scratch_regno = split_stack_prologue_scratch_regno ();
7584 if (scratch_regno != INVALID_REGNUM)
7585 {
7586 rtx reg, seq;
7587
7588 reg = gen_reg_rtx (Pmode);
7589 cfun->machine->split_stack_varargs_pointer = reg;
7590
7591 start_sequence ();
7592 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7593 seq = get_insns ();
7594 end_sequence ();
7595
7596 push_topmost_sequence ();
7597 emit_insn_after (seq, entry_of_function ());
7598 pop_topmost_sequence ();
7599 }
7600 }
7601
7602 /* Only 64bit target needs something special. */
7603 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7604 {
7605 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7606 std_expand_builtin_va_start (valist, nextarg);
7607 else
7608 {
7609 rtx va_r, next;
7610
7611 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7612 next = expand_binop (ptr_mode, add_optab,
7613 cfun->machine->split_stack_varargs_pointer,
7614 crtl->args.arg_offset_rtx,
7615 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7616 convert_move (va_r, next, 0);
7617 }
7618 return;
7619 }
7620
7621 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7622 f_fpr = DECL_CHAIN (f_gpr);
7623 f_ovf = DECL_CHAIN (f_fpr);
7624 f_sav = DECL_CHAIN (f_ovf);
7625
7626 valist = build_simple_mem_ref (valist);
7627 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7628 /* The following should be folded into the MEM_REF offset. */
7629 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7630 f_gpr, NULL_TREE);
7631 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7632 f_fpr, NULL_TREE);
7633 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7634 f_ovf, NULL_TREE);
7635 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7636 f_sav, NULL_TREE);
7637
7638 /* Count number of gp and fp argument registers used. */
7639 words = crtl->args.info.words;
7640 n_gpr = crtl->args.info.regno;
7641 n_fpr = crtl->args.info.sse_regno;
7642
7643 if (cfun->va_list_gpr_size)
7644 {
7645 type = TREE_TYPE (gpr);
7646 t = build2 (MODIFY_EXPR, type,
7647 gpr, build_int_cst (type, n_gpr * 8));
7648 TREE_SIDE_EFFECTS (t) = 1;
7649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7650 }
7651
7652 if (TARGET_SSE && cfun->va_list_fpr_size)
7653 {
7654 type = TREE_TYPE (fpr);
7655 t = build2 (MODIFY_EXPR, type, fpr,
7656 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7657 TREE_SIDE_EFFECTS (t) = 1;
7658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7659 }
7660
7661 /* Find the overflow area. */
7662 type = TREE_TYPE (ovf);
7663 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7664 ovf_rtx = crtl->args.internal_arg_pointer;
7665 else
7666 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7667 t = make_tree (type, ovf_rtx);
7668 if (words != 0)
7669 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7670 t = build2 (MODIFY_EXPR, type, ovf, t);
7671 TREE_SIDE_EFFECTS (t) = 1;
7672 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7673
7674 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7675 {
7676 /* Find the register save area.
7677 Prologue of the function save it right above stack frame. */
7678 type = TREE_TYPE (sav);
7679 t = make_tree (type, frame_pointer_rtx);
7680 if (!ix86_varargs_gpr_size)
7681 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7682 t = build2 (MODIFY_EXPR, type, sav, t);
7683 TREE_SIDE_EFFECTS (t) = 1;
7684 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7685 }
7686 }
7687
7688 /* Implement va_arg. */
7689
7690 static tree
7691 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7692 gimple_seq *post_p)
7693 {
7694 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7695 tree f_gpr, f_fpr, f_ovf, f_sav;
7696 tree gpr, fpr, ovf, sav, t;
7697 int size, rsize;
7698 tree lab_false, lab_over = NULL_TREE;
7699 tree addr, t2;
7700 rtx container;
7701 int indirect_p = 0;
7702 tree ptrtype;
7703 enum machine_mode nat_mode;
7704 unsigned int arg_boundary;
7705
7706 /* Only 64bit target needs something special. */
7707 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7708 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7709
7710 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7711 f_fpr = DECL_CHAIN (f_gpr);
7712 f_ovf = DECL_CHAIN (f_fpr);
7713 f_sav = DECL_CHAIN (f_ovf);
7714
7715 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7716 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7717 valist = build_va_arg_indirect_ref (valist);
7718 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7719 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7720 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7721
7722 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7723 if (indirect_p)
7724 type = build_pointer_type (type);
7725 size = int_size_in_bytes (type);
7726 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7727
7728 nat_mode = type_natural_mode (type, NULL);
7729 switch (nat_mode)
7730 {
7731 case V8SFmode:
7732 case V8SImode:
7733 case V32QImode:
7734 case V16HImode:
7735 case V4DFmode:
7736 case V4DImode:
7737 /* Unnamed 256bit vector mode parameters are passed on stack. */
7738 if (!TARGET_64BIT_MS_ABI)
7739 {
7740 container = NULL;
7741 break;
7742 }
7743
7744 default:
7745 container = construct_container (nat_mode, TYPE_MODE (type),
7746 type, 0, X86_64_REGPARM_MAX,
7747 X86_64_SSE_REGPARM_MAX, intreg,
7748 0);
7749 break;
7750 }
7751
7752 /* Pull the value out of the saved registers. */
7753
7754 addr = create_tmp_var (ptr_type_node, "addr");
7755
7756 if (container)
7757 {
7758 int needed_intregs, needed_sseregs;
7759 bool need_temp;
7760 tree int_addr, sse_addr;
7761
7762 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7763 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7764
7765 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7766
7767 need_temp = (!REG_P (container)
7768 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7769 || TYPE_ALIGN (type) > 128));
7770
7771 /* In case we are passing structure, verify that it is consecutive block
7772 on the register save area. If not we need to do moves. */
7773 if (!need_temp && !REG_P (container))
7774 {
7775 /* Verify that all registers are strictly consecutive */
7776 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7777 {
7778 int i;
7779
7780 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7781 {
7782 rtx slot = XVECEXP (container, 0, i);
7783 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7784 || INTVAL (XEXP (slot, 1)) != i * 16)
7785 need_temp = 1;
7786 }
7787 }
7788 else
7789 {
7790 int i;
7791
7792 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7793 {
7794 rtx slot = XVECEXP (container, 0, i);
7795 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7796 || INTVAL (XEXP (slot, 1)) != i * 8)
7797 need_temp = 1;
7798 }
7799 }
7800 }
7801 if (!need_temp)
7802 {
7803 int_addr = addr;
7804 sse_addr = addr;
7805 }
7806 else
7807 {
7808 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7809 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7810 }
7811
7812 /* First ensure that we fit completely in registers. */
7813 if (needed_intregs)
7814 {
7815 t = build_int_cst (TREE_TYPE (gpr),
7816 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7817 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7818 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7819 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7820 gimplify_and_add (t, pre_p);
7821 }
7822 if (needed_sseregs)
7823 {
7824 t = build_int_cst (TREE_TYPE (fpr),
7825 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7826 + X86_64_REGPARM_MAX * 8);
7827 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7828 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7829 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7830 gimplify_and_add (t, pre_p);
7831 }
7832
7833 /* Compute index to start of area used for integer regs. */
7834 if (needed_intregs)
7835 {
7836 /* int_addr = gpr + sav; */
7837 t = fold_build_pointer_plus (sav, gpr);
7838 gimplify_assign (int_addr, t, pre_p);
7839 }
7840 if (needed_sseregs)
7841 {
7842 /* sse_addr = fpr + sav; */
7843 t = fold_build_pointer_plus (sav, fpr);
7844 gimplify_assign (sse_addr, t, pre_p);
7845 }
7846 if (need_temp)
7847 {
7848 int i, prev_size = 0;
7849 tree temp = create_tmp_var (type, "va_arg_tmp");
7850
7851 /* addr = &temp; */
7852 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7853 gimplify_assign (addr, t, pre_p);
7854
7855 for (i = 0; i < XVECLEN (container, 0); i++)
7856 {
7857 rtx slot = XVECEXP (container, 0, i);
7858 rtx reg = XEXP (slot, 0);
7859 enum machine_mode mode = GET_MODE (reg);
7860 tree piece_type;
7861 tree addr_type;
7862 tree daddr_type;
7863 tree src_addr, src;
7864 int src_offset;
7865 tree dest_addr, dest;
7866 int cur_size = GET_MODE_SIZE (mode);
7867
7868 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7869 prev_size = INTVAL (XEXP (slot, 1));
7870 if (prev_size + cur_size > size)
7871 {
7872 cur_size = size - prev_size;
7873 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7874 if (mode == BLKmode)
7875 mode = QImode;
7876 }
7877 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7878 if (mode == GET_MODE (reg))
7879 addr_type = build_pointer_type (piece_type);
7880 else
7881 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7882 true);
7883 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7884 true);
7885
7886 if (SSE_REGNO_P (REGNO (reg)))
7887 {
7888 src_addr = sse_addr;
7889 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7890 }
7891 else
7892 {
7893 src_addr = int_addr;
7894 src_offset = REGNO (reg) * 8;
7895 }
7896 src_addr = fold_convert (addr_type, src_addr);
7897 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7898
7899 dest_addr = fold_convert (daddr_type, addr);
7900 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7901 if (cur_size == GET_MODE_SIZE (mode))
7902 {
7903 src = build_va_arg_indirect_ref (src_addr);
7904 dest = build_va_arg_indirect_ref (dest_addr);
7905
7906 gimplify_assign (dest, src, pre_p);
7907 }
7908 else
7909 {
7910 tree copy
7911 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7912 3, dest_addr, src_addr,
7913 size_int (cur_size));
7914 gimplify_and_add (copy, pre_p);
7915 }
7916 prev_size += cur_size;
7917 }
7918 }
7919
7920 if (needed_intregs)
7921 {
7922 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7923 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7924 gimplify_assign (gpr, t, pre_p);
7925 }
7926
7927 if (needed_sseregs)
7928 {
7929 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7930 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7931 gimplify_assign (fpr, t, pre_p);
7932 }
7933
7934 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7935
7936 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7937 }
7938
7939 /* ... otherwise out of the overflow area. */
7940
7941 /* When we align parameter on stack for caller, if the parameter
7942 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7943 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7944 here with caller. */
7945 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7946 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7947 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7948
7949 /* Care for on-stack alignment if needed. */
7950 if (arg_boundary <= 64 || size == 0)
7951 t = ovf;
7952 else
7953 {
7954 HOST_WIDE_INT align = arg_boundary / 8;
7955 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7956 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7957 build_int_cst (TREE_TYPE (t), -align));
7958 }
7959
7960 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7961 gimplify_assign (addr, t, pre_p);
7962
7963 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7964 gimplify_assign (unshare_expr (ovf), t, pre_p);
7965
7966 if (container)
7967 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7968
7969 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7970 addr = fold_convert (ptrtype, addr);
7971
7972 if (indirect_p)
7973 addr = build_va_arg_indirect_ref (addr);
7974 return build_va_arg_indirect_ref (addr);
7975 }
7976 \f
7977 /* Return true if OPNUM's MEM should be matched
7978 in movabs* patterns. */
7979
7980 bool
7981 ix86_check_movabs (rtx insn, int opnum)
7982 {
7983 rtx set, mem;
7984
7985 set = PATTERN (insn);
7986 if (GET_CODE (set) == PARALLEL)
7987 set = XVECEXP (set, 0, 0);
7988 gcc_assert (GET_CODE (set) == SET);
7989 mem = XEXP (set, opnum);
7990 while (GET_CODE (mem) == SUBREG)
7991 mem = SUBREG_REG (mem);
7992 gcc_assert (MEM_P (mem));
7993 return volatile_ok || !MEM_VOLATILE_P (mem);
7994 }
7995 \f
7996 /* Initialize the table of extra 80387 mathematical constants. */
7997
7998 static void
7999 init_ext_80387_constants (void)
8000 {
8001 static const char * cst[5] =
8002 {
8003 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8004 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8005 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8006 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8007 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8008 };
8009 int i;
8010
8011 for (i = 0; i < 5; i++)
8012 {
8013 real_from_string (&ext_80387_constants_table[i], cst[i]);
8014 /* Ensure each constant is rounded to XFmode precision. */
8015 real_convert (&ext_80387_constants_table[i],
8016 XFmode, &ext_80387_constants_table[i]);
8017 }
8018
8019 ext_80387_constants_init = 1;
8020 }
8021
8022 /* Return non-zero if the constant is something that
8023 can be loaded with a special instruction. */
8024
8025 int
8026 standard_80387_constant_p (rtx x)
8027 {
8028 enum machine_mode mode = GET_MODE (x);
8029
8030 REAL_VALUE_TYPE r;
8031
8032 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8033 return -1;
8034
8035 if (x == CONST0_RTX (mode))
8036 return 1;
8037 if (x == CONST1_RTX (mode))
8038 return 2;
8039
8040 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8041
8042 /* For XFmode constants, try to find a special 80387 instruction when
8043 optimizing for size or on those CPUs that benefit from them. */
8044 if (mode == XFmode
8045 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8046 {
8047 int i;
8048
8049 if (! ext_80387_constants_init)
8050 init_ext_80387_constants ();
8051
8052 for (i = 0; i < 5; i++)
8053 if (real_identical (&r, &ext_80387_constants_table[i]))
8054 return i + 3;
8055 }
8056
8057 /* Load of the constant -0.0 or -1.0 will be split as
8058 fldz;fchs or fld1;fchs sequence. */
8059 if (real_isnegzero (&r))
8060 return 8;
8061 if (real_identical (&r, &dconstm1))
8062 return 9;
8063
8064 return 0;
8065 }
8066
8067 /* Return the opcode of the special instruction to be used to load
8068 the constant X. */
8069
8070 const char *
8071 standard_80387_constant_opcode (rtx x)
8072 {
8073 switch (standard_80387_constant_p (x))
8074 {
8075 case 1:
8076 return "fldz";
8077 case 2:
8078 return "fld1";
8079 case 3:
8080 return "fldlg2";
8081 case 4:
8082 return "fldln2";
8083 case 5:
8084 return "fldl2e";
8085 case 6:
8086 return "fldl2t";
8087 case 7:
8088 return "fldpi";
8089 case 8:
8090 case 9:
8091 return "#";
8092 default:
8093 gcc_unreachable ();
8094 }
8095 }
8096
8097 /* Return the CONST_DOUBLE representing the 80387 constant that is
8098 loaded by the specified special instruction. The argument IDX
8099 matches the return value from standard_80387_constant_p. */
8100
8101 rtx
8102 standard_80387_constant_rtx (int idx)
8103 {
8104 int i;
8105
8106 if (! ext_80387_constants_init)
8107 init_ext_80387_constants ();
8108
8109 switch (idx)
8110 {
8111 case 3:
8112 case 4:
8113 case 5:
8114 case 6:
8115 case 7:
8116 i = idx - 3;
8117 break;
8118
8119 default:
8120 gcc_unreachable ();
8121 }
8122
8123 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8124 XFmode);
8125 }
8126
8127 /* Return 1 if X is all 0s and 2 if x is all 1s
8128 in supported SSE vector mode. */
8129
8130 int
8131 standard_sse_constant_p (rtx x)
8132 {
8133 enum machine_mode mode = GET_MODE (x);
8134
8135 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8136 return 1;
8137 if (vector_all_ones_operand (x, mode))
8138 switch (mode)
8139 {
8140 case V16QImode:
8141 case V8HImode:
8142 case V4SImode:
8143 case V2DImode:
8144 if (TARGET_SSE2)
8145 return 2;
8146 default:
8147 break;
8148 }
8149
8150 return 0;
8151 }
8152
8153 /* Return the opcode of the special instruction to be used to load
8154 the constant X. */
8155
8156 const char *
8157 standard_sse_constant_opcode (rtx insn, rtx x)
8158 {
8159 switch (standard_sse_constant_p (x))
8160 {
8161 case 1:
8162 switch (get_attr_mode (insn))
8163 {
8164 case MODE_TI:
8165 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8166 return "%vpxor\t%0, %d0";
8167 case MODE_V2DF:
8168 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8169 return "%vxorpd\t%0, %d0";
8170 case MODE_V4SF:
8171 return "%vxorps\t%0, %d0";
8172
8173 case MODE_OI:
8174 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8175 return "vpxor\t%x0, %x0, %x0";
8176 case MODE_V4DF:
8177 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8178 return "vxorpd\t%x0, %x0, %x0";
8179 case MODE_V8SF:
8180 return "vxorps\t%x0, %x0, %x0";
8181
8182 default:
8183 break;
8184 }
8185
8186 case 2:
8187 return "%vpcmpeqd\t%0, %d0";
8188 default:
8189 break;
8190 }
8191 gcc_unreachable ();
8192 }
8193
8194 /* Returns true if OP contains a symbol reference */
8195
8196 bool
8197 symbolic_reference_mentioned_p (rtx op)
8198 {
8199 const char *fmt;
8200 int i;
8201
8202 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8203 return true;
8204
8205 fmt = GET_RTX_FORMAT (GET_CODE (op));
8206 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8207 {
8208 if (fmt[i] == 'E')
8209 {
8210 int j;
8211
8212 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8213 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8214 return true;
8215 }
8216
8217 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8218 return true;
8219 }
8220
8221 return false;
8222 }
8223
8224 /* Return true if it is appropriate to emit `ret' instructions in the
8225 body of a function. Do this only if the epilogue is simple, needing a
8226 couple of insns. Prior to reloading, we can't tell how many registers
8227 must be saved, so return false then. Return false if there is no frame
8228 marker to de-allocate. */
8229
8230 bool
8231 ix86_can_use_return_insn_p (void)
8232 {
8233 struct ix86_frame frame;
8234
8235 if (! reload_completed || frame_pointer_needed)
8236 return 0;
8237
8238 /* Don't allow more than 32k pop, since that's all we can do
8239 with one instruction. */
8240 if (crtl->args.pops_args && crtl->args.size >= 32768)
8241 return 0;
8242
8243 ix86_compute_frame_layout (&frame);
8244 return (frame.stack_pointer_offset == UNITS_PER_WORD
8245 && (frame.nregs + frame.nsseregs) == 0);
8246 }
8247 \f
8248 /* Value should be nonzero if functions must have frame pointers.
8249 Zero means the frame pointer need not be set up (and parms may
8250 be accessed via the stack pointer) in functions that seem suitable. */
8251
8252 static bool
8253 ix86_frame_pointer_required (void)
8254 {
8255 /* If we accessed previous frames, then the generated code expects
8256 to be able to access the saved ebp value in our frame. */
8257 if (cfun->machine->accesses_prev_frame)
8258 return true;
8259
8260 /* Several x86 os'es need a frame pointer for other reasons,
8261 usually pertaining to setjmp. */
8262 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8263 return true;
8264
8265 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8266 turns off the frame pointer by default. Turn it back on now if
8267 we've not got a leaf function. */
8268 if (TARGET_OMIT_LEAF_FRAME_POINTER
8269 && (!current_function_is_leaf
8270 || ix86_current_function_calls_tls_descriptor))
8271 return true;
8272
8273 if (crtl->profile && !flag_fentry)
8274 return true;
8275
8276 return false;
8277 }
8278
8279 /* Record that the current function accesses previous call frames. */
8280
8281 void
8282 ix86_setup_frame_addresses (void)
8283 {
8284 cfun->machine->accesses_prev_frame = 1;
8285 }
8286 \f
8287 #ifndef USE_HIDDEN_LINKONCE
8288 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8289 # define USE_HIDDEN_LINKONCE 1
8290 # else
8291 # define USE_HIDDEN_LINKONCE 0
8292 # endif
8293 #endif
8294
8295 static int pic_labels_used;
8296
8297 /* Fills in the label name that should be used for a pc thunk for
8298 the given register. */
8299
8300 static void
8301 get_pc_thunk_name (char name[32], unsigned int regno)
8302 {
8303 gcc_assert (!TARGET_64BIT);
8304
8305 if (USE_HIDDEN_LINKONCE)
8306 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8307 else
8308 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8309 }
8310
8311
8312 /* This function generates code for -fpic that loads %ebx with
8313 the return address of the caller and then returns. */
8314
8315 static void
8316 ix86_code_end (void)
8317 {
8318 rtx xops[2];
8319 int regno;
8320
8321 #ifdef TARGET_SOLARIS
8322 solaris_code_end ();
8323 #endif
8324
8325 for (regno = AX_REG; regno <= SP_REG; regno++)
8326 {
8327 char name[32];
8328 tree decl;
8329
8330 if (!(pic_labels_used & (1 << regno)))
8331 continue;
8332
8333 get_pc_thunk_name (name, regno);
8334
8335 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8336 get_identifier (name),
8337 build_function_type_list (void_type_node, NULL_TREE));
8338 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8339 NULL_TREE, void_type_node);
8340 TREE_PUBLIC (decl) = 1;
8341 TREE_STATIC (decl) = 1;
8342
8343 #if TARGET_MACHO
8344 if (TARGET_MACHO)
8345 {
8346 switch_to_section (darwin_sections[text_coal_section]);
8347 fputs ("\t.weak_definition\t", asm_out_file);
8348 assemble_name (asm_out_file, name);
8349 fputs ("\n\t.private_extern\t", asm_out_file);
8350 assemble_name (asm_out_file, name);
8351 putc ('\n', asm_out_file);
8352 ASM_OUTPUT_LABEL (asm_out_file, name);
8353 DECL_WEAK (decl) = 1;
8354 }
8355 else
8356 #endif
8357 if (USE_HIDDEN_LINKONCE)
8358 {
8359 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8360
8361 targetm.asm_out.unique_section (decl, 0);
8362 switch_to_section (get_named_section (decl, NULL, 0));
8363
8364 targetm.asm_out.globalize_label (asm_out_file, name);
8365 fputs ("\t.hidden\t", asm_out_file);
8366 assemble_name (asm_out_file, name);
8367 putc ('\n', asm_out_file);
8368 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8369 }
8370 else
8371 {
8372 switch_to_section (text_section);
8373 ASM_OUTPUT_LABEL (asm_out_file, name);
8374 }
8375
8376 DECL_INITIAL (decl) = make_node (BLOCK);
8377 current_function_decl = decl;
8378 init_function_start (decl);
8379 first_function_block_is_cold = false;
8380 /* Make sure unwind info is emitted for the thunk if needed. */
8381 final_start_function (emit_barrier (), asm_out_file, 1);
8382
8383 /* Pad stack IP move with 4 instructions (two NOPs count
8384 as one instruction). */
8385 if (TARGET_PAD_SHORT_FUNCTION)
8386 {
8387 int i = 8;
8388
8389 while (i--)
8390 fputs ("\tnop\n", asm_out_file);
8391 }
8392
8393 xops[0] = gen_rtx_REG (Pmode, regno);
8394 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8395 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8396 fputs ("\tret\n", asm_out_file);
8397 final_end_function ();
8398 init_insn_lengths ();
8399 free_after_compilation (cfun);
8400 set_cfun (NULL);
8401 current_function_decl = NULL;
8402 }
8403
8404 if (flag_split_stack)
8405 file_end_indicate_split_stack ();
8406 }
8407
8408 /* Emit code for the SET_GOT patterns. */
8409
8410 const char *
8411 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8412 {
8413 rtx xops[3];
8414
8415 xops[0] = dest;
8416
8417 if (TARGET_VXWORKS_RTP && flag_pic)
8418 {
8419 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8420 xops[2] = gen_rtx_MEM (Pmode,
8421 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8422 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8423
8424 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8425 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8426 an unadorned address. */
8427 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8428 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8429 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8430 return "";
8431 }
8432
8433 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8434
8435 if (!flag_pic)
8436 {
8437 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8438
8439 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8440
8441 #if TARGET_MACHO
8442 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8443 is what will be referenced by the Mach-O PIC subsystem. */
8444 if (!label)
8445 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8446 #endif
8447
8448 targetm.asm_out.internal_label (asm_out_file, "L",
8449 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8450 }
8451 else
8452 {
8453 char name[32];
8454 get_pc_thunk_name (name, REGNO (dest));
8455 pic_labels_used |= 1 << REGNO (dest);
8456
8457 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8458 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8459 output_asm_insn ("call\t%X2", xops);
8460 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8461 is what will be referenced by the Mach-O PIC subsystem. */
8462 #if TARGET_MACHO
8463 if (!label)
8464 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8465 else
8466 targetm.asm_out.internal_label (asm_out_file, "L",
8467 CODE_LABEL_NUMBER (label));
8468 #endif
8469 }
8470
8471 if (!TARGET_MACHO)
8472 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8473
8474 return "";
8475 }
8476
8477 /* Generate an "push" pattern for input ARG. */
8478
8479 static rtx
8480 gen_push (rtx arg)
8481 {
8482 struct machine_function *m = cfun->machine;
8483
8484 if (m->fs.cfa_reg == stack_pointer_rtx)
8485 m->fs.cfa_offset += UNITS_PER_WORD;
8486 m->fs.sp_offset += UNITS_PER_WORD;
8487
8488 return gen_rtx_SET (VOIDmode,
8489 gen_rtx_MEM (Pmode,
8490 gen_rtx_PRE_DEC (Pmode,
8491 stack_pointer_rtx)),
8492 arg);
8493 }
8494
8495 /* Generate an "pop" pattern for input ARG. */
8496
8497 static rtx
8498 gen_pop (rtx arg)
8499 {
8500 return gen_rtx_SET (VOIDmode,
8501 arg,
8502 gen_rtx_MEM (Pmode,
8503 gen_rtx_POST_INC (Pmode,
8504 stack_pointer_rtx)));
8505 }
8506
8507 /* Return >= 0 if there is an unused call-clobbered register available
8508 for the entire function. */
8509
8510 static unsigned int
8511 ix86_select_alt_pic_regnum (void)
8512 {
8513 if (current_function_is_leaf
8514 && !crtl->profile
8515 && !ix86_current_function_calls_tls_descriptor)
8516 {
8517 int i, drap;
8518 /* Can't use the same register for both PIC and DRAP. */
8519 if (crtl->drap_reg)
8520 drap = REGNO (crtl->drap_reg);
8521 else
8522 drap = -1;
8523 for (i = 2; i >= 0; --i)
8524 if (i != drap && !df_regs_ever_live_p (i))
8525 return i;
8526 }
8527
8528 return INVALID_REGNUM;
8529 }
8530
8531 /* Return TRUE if we need to save REGNO. */
8532
8533 static bool
8534 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8535 {
8536 if (pic_offset_table_rtx
8537 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8538 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8539 || crtl->profile
8540 || crtl->calls_eh_return
8541 || crtl->uses_const_pool))
8542 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8543
8544 if (crtl->calls_eh_return && maybe_eh_return)
8545 {
8546 unsigned i;
8547 for (i = 0; ; i++)
8548 {
8549 unsigned test = EH_RETURN_DATA_REGNO (i);
8550 if (test == INVALID_REGNUM)
8551 break;
8552 if (test == regno)
8553 return true;
8554 }
8555 }
8556
8557 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8558 return true;
8559
8560 return (df_regs_ever_live_p (regno)
8561 && !call_used_regs[regno]
8562 && !fixed_regs[regno]
8563 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8564 }
8565
8566 /* Return number of saved general prupose registers. */
8567
8568 static int
8569 ix86_nsaved_regs (void)
8570 {
8571 int nregs = 0;
8572 int regno;
8573
8574 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8575 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8576 nregs ++;
8577 return nregs;
8578 }
8579
8580 /* Return number of saved SSE registrers. */
8581
8582 static int
8583 ix86_nsaved_sseregs (void)
8584 {
8585 int nregs = 0;
8586 int regno;
8587
8588 if (!TARGET_64BIT_MS_ABI)
8589 return 0;
8590 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8591 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8592 nregs ++;
8593 return nregs;
8594 }
8595
8596 /* Given FROM and TO register numbers, say whether this elimination is
8597 allowed. If stack alignment is needed, we can only replace argument
8598 pointer with hard frame pointer, or replace frame pointer with stack
8599 pointer. Otherwise, frame pointer elimination is automatically
8600 handled and all other eliminations are valid. */
8601
8602 static bool
8603 ix86_can_eliminate (const int from, const int to)
8604 {
8605 if (stack_realign_fp)
8606 return ((from == ARG_POINTER_REGNUM
8607 && to == HARD_FRAME_POINTER_REGNUM)
8608 || (from == FRAME_POINTER_REGNUM
8609 && to == STACK_POINTER_REGNUM));
8610 else
8611 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8612 }
8613
8614 /* Return the offset between two registers, one to be eliminated, and the other
8615 its replacement, at the start of a routine. */
8616
8617 HOST_WIDE_INT
8618 ix86_initial_elimination_offset (int from, int to)
8619 {
8620 struct ix86_frame frame;
8621 ix86_compute_frame_layout (&frame);
8622
8623 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8624 return frame.hard_frame_pointer_offset;
8625 else if (from == FRAME_POINTER_REGNUM
8626 && to == HARD_FRAME_POINTER_REGNUM)
8627 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8628 else
8629 {
8630 gcc_assert (to == STACK_POINTER_REGNUM);
8631
8632 if (from == ARG_POINTER_REGNUM)
8633 return frame.stack_pointer_offset;
8634
8635 gcc_assert (from == FRAME_POINTER_REGNUM);
8636 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8637 }
8638 }
8639
8640 /* In a dynamically-aligned function, we can't know the offset from
8641 stack pointer to frame pointer, so we must ensure that setjmp
8642 eliminates fp against the hard fp (%ebp) rather than trying to
8643 index from %esp up to the top of the frame across a gap that is
8644 of unknown (at compile-time) size. */
8645 static rtx
8646 ix86_builtin_setjmp_frame_value (void)
8647 {
8648 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8649 }
8650
8651 /* When using -fsplit-stack, the allocation routines set a field in
8652 the TCB to the bottom of the stack plus this much space, measured
8653 in bytes. */
8654
8655 #define SPLIT_STACK_AVAILABLE 256
8656
8657 /* Fill structure ix86_frame about frame of currently computed function. */
8658
8659 static void
8660 ix86_compute_frame_layout (struct ix86_frame *frame)
8661 {
8662 unsigned int stack_alignment_needed;
8663 HOST_WIDE_INT offset;
8664 unsigned int preferred_alignment;
8665 HOST_WIDE_INT size = get_frame_size ();
8666 HOST_WIDE_INT to_allocate;
8667
8668 frame->nregs = ix86_nsaved_regs ();
8669 frame->nsseregs = ix86_nsaved_sseregs ();
8670
8671 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8672 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8673
8674 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8675 function prologues and leaf. */
8676 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8677 && (!current_function_is_leaf || cfun->calls_alloca != 0
8678 || ix86_current_function_calls_tls_descriptor))
8679 {
8680 preferred_alignment = 16;
8681 stack_alignment_needed = 16;
8682 crtl->preferred_stack_boundary = 128;
8683 crtl->stack_alignment_needed = 128;
8684 }
8685
8686 gcc_assert (!size || stack_alignment_needed);
8687 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8688 gcc_assert (preferred_alignment <= stack_alignment_needed);
8689
8690 /* For SEH we have to limit the amount of code movement into the prologue.
8691 At present we do this via a BLOCKAGE, at which point there's very little
8692 scheduling that can be done, which means that there's very little point
8693 in doing anything except PUSHs. */
8694 if (TARGET_SEH)
8695 cfun->machine->use_fast_prologue_epilogue = false;
8696
8697 /* During reload iteration the amount of registers saved can change.
8698 Recompute the value as needed. Do not recompute when amount of registers
8699 didn't change as reload does multiple calls to the function and does not
8700 expect the decision to change within single iteration. */
8701 else if (!optimize_function_for_size_p (cfun)
8702 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8703 {
8704 int count = frame->nregs;
8705 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8706
8707 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8708
8709 /* The fast prologue uses move instead of push to save registers. This
8710 is significantly longer, but also executes faster as modern hardware
8711 can execute the moves in parallel, but can't do that for push/pop.
8712
8713 Be careful about choosing what prologue to emit: When function takes
8714 many instructions to execute we may use slow version as well as in
8715 case function is known to be outside hot spot (this is known with
8716 feedback only). Weight the size of function by number of registers
8717 to save as it is cheap to use one or two push instructions but very
8718 slow to use many of them. */
8719 if (count)
8720 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8721 if (node->frequency < NODE_FREQUENCY_NORMAL
8722 || (flag_branch_probabilities
8723 && node->frequency < NODE_FREQUENCY_HOT))
8724 cfun->machine->use_fast_prologue_epilogue = false;
8725 else
8726 cfun->machine->use_fast_prologue_epilogue
8727 = !expensive_function_p (count);
8728 }
8729 if (TARGET_PROLOGUE_USING_MOVE
8730 && cfun->machine->use_fast_prologue_epilogue)
8731 frame->save_regs_using_mov = true;
8732 else
8733 frame->save_regs_using_mov = false;
8734
8735 /* If static stack checking is enabled and done with probes, the registers
8736 need to be saved before allocating the frame. */
8737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8738 frame->save_regs_using_mov = false;
8739
8740 /* Skip return address. */
8741 offset = UNITS_PER_WORD;
8742
8743 /* Skip pushed static chain. */
8744 if (ix86_static_chain_on_stack)
8745 offset += UNITS_PER_WORD;
8746
8747 /* Skip saved base pointer. */
8748 if (frame_pointer_needed)
8749 offset += UNITS_PER_WORD;
8750 frame->hfp_save_offset = offset;
8751
8752 /* The traditional frame pointer location is at the top of the frame. */
8753 frame->hard_frame_pointer_offset = offset;
8754
8755 /* Register save area */
8756 offset += frame->nregs * UNITS_PER_WORD;
8757 frame->reg_save_offset = offset;
8758
8759 /* Align and set SSE register save area. */
8760 if (frame->nsseregs)
8761 {
8762 /* The only ABI that has saved SSE registers (Win64) also has a
8763 16-byte aligned default stack, and thus we don't need to be
8764 within the re-aligned local stack frame to save them. */
8765 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8766 offset = (offset + 16 - 1) & -16;
8767 offset += frame->nsseregs * 16;
8768 }
8769 frame->sse_reg_save_offset = offset;
8770
8771 /* The re-aligned stack starts here. Values before this point are not
8772 directly comparable with values below this point. In order to make
8773 sure that no value happens to be the same before and after, force
8774 the alignment computation below to add a non-zero value. */
8775 if (stack_realign_fp)
8776 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8777
8778 /* Va-arg area */
8779 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8780 offset += frame->va_arg_size;
8781
8782 /* Align start of frame for local function. */
8783 if (stack_realign_fp
8784 || offset != frame->sse_reg_save_offset
8785 || size != 0
8786 || !current_function_is_leaf
8787 || cfun->calls_alloca
8788 || ix86_current_function_calls_tls_descriptor)
8789 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8790
8791 /* Frame pointer points here. */
8792 frame->frame_pointer_offset = offset;
8793
8794 offset += size;
8795
8796 /* Add outgoing arguments area. Can be skipped if we eliminated
8797 all the function calls as dead code.
8798 Skipping is however impossible when function calls alloca. Alloca
8799 expander assumes that last crtl->outgoing_args_size
8800 of stack frame are unused. */
8801 if (ACCUMULATE_OUTGOING_ARGS
8802 && (!current_function_is_leaf || cfun->calls_alloca
8803 || ix86_current_function_calls_tls_descriptor))
8804 {
8805 offset += crtl->outgoing_args_size;
8806 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8807 }
8808 else
8809 frame->outgoing_arguments_size = 0;
8810
8811 /* Align stack boundary. Only needed if we're calling another function
8812 or using alloca. */
8813 if (!current_function_is_leaf || cfun->calls_alloca
8814 || ix86_current_function_calls_tls_descriptor)
8815 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8816
8817 /* We've reached end of stack frame. */
8818 frame->stack_pointer_offset = offset;
8819
8820 /* Size prologue needs to allocate. */
8821 to_allocate = offset - frame->sse_reg_save_offset;
8822
8823 if ((!to_allocate && frame->nregs <= 1)
8824 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8825 frame->save_regs_using_mov = false;
8826
8827 if (ix86_using_red_zone ()
8828 && current_function_sp_is_unchanging
8829 && current_function_is_leaf
8830 && !ix86_current_function_calls_tls_descriptor)
8831 {
8832 frame->red_zone_size = to_allocate;
8833 if (frame->save_regs_using_mov)
8834 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8835 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8836 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8837 }
8838 else
8839 frame->red_zone_size = 0;
8840 frame->stack_pointer_offset -= frame->red_zone_size;
8841
8842 /* The SEH frame pointer location is near the bottom of the frame.
8843 This is enforced by the fact that the difference between the
8844 stack pointer and the frame pointer is limited to 240 bytes in
8845 the unwind data structure. */
8846 if (TARGET_SEH)
8847 {
8848 HOST_WIDE_INT diff;
8849
8850 /* If we can leave the frame pointer where it is, do so. */
8851 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8852 if (diff > 240 || (diff & 15) != 0)
8853 {
8854 /* Ideally we'd determine what portion of the local stack frame
8855 (within the constraint of the lowest 240) is most heavily used.
8856 But without that complication, simply bias the frame pointer
8857 by 128 bytes so as to maximize the amount of the local stack
8858 frame that is addressable with 8-bit offsets. */
8859 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8860 }
8861 }
8862 }
8863
8864 /* This is semi-inlined memory_address_length, but simplified
8865 since we know that we're always dealing with reg+offset, and
8866 to avoid having to create and discard all that rtl. */
8867
8868 static inline int
8869 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8870 {
8871 int len = 4;
8872
8873 if (offset == 0)
8874 {
8875 /* EBP and R13 cannot be encoded without an offset. */
8876 len = (regno == BP_REG || regno == R13_REG);
8877 }
8878 else if (IN_RANGE (offset, -128, 127))
8879 len = 1;
8880
8881 /* ESP and R12 must be encoded with a SIB byte. */
8882 if (regno == SP_REG || regno == R12_REG)
8883 len++;
8884
8885 return len;
8886 }
8887
8888 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8889 The valid base registers are taken from CFUN->MACHINE->FS. */
8890
8891 static rtx
8892 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8893 {
8894 const struct machine_function *m = cfun->machine;
8895 rtx base_reg = NULL;
8896 HOST_WIDE_INT base_offset = 0;
8897
8898 if (m->use_fast_prologue_epilogue)
8899 {
8900 /* Choose the base register most likely to allow the most scheduling
8901 opportunities. Generally FP is valid througout the function,
8902 while DRAP must be reloaded within the epilogue. But choose either
8903 over the SP due to increased encoding size. */
8904
8905 if (m->fs.fp_valid)
8906 {
8907 base_reg = hard_frame_pointer_rtx;
8908 base_offset = m->fs.fp_offset - cfa_offset;
8909 }
8910 else if (m->fs.drap_valid)
8911 {
8912 base_reg = crtl->drap_reg;
8913 base_offset = 0 - cfa_offset;
8914 }
8915 else if (m->fs.sp_valid)
8916 {
8917 base_reg = stack_pointer_rtx;
8918 base_offset = m->fs.sp_offset - cfa_offset;
8919 }
8920 }
8921 else
8922 {
8923 HOST_WIDE_INT toffset;
8924 int len = 16, tlen;
8925
8926 /* Choose the base register with the smallest address encoding.
8927 With a tie, choose FP > DRAP > SP. */
8928 if (m->fs.sp_valid)
8929 {
8930 base_reg = stack_pointer_rtx;
8931 base_offset = m->fs.sp_offset - cfa_offset;
8932 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8933 }
8934 if (m->fs.drap_valid)
8935 {
8936 toffset = 0 - cfa_offset;
8937 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8938 if (tlen <= len)
8939 {
8940 base_reg = crtl->drap_reg;
8941 base_offset = toffset;
8942 len = tlen;
8943 }
8944 }
8945 if (m->fs.fp_valid)
8946 {
8947 toffset = m->fs.fp_offset - cfa_offset;
8948 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8949 if (tlen <= len)
8950 {
8951 base_reg = hard_frame_pointer_rtx;
8952 base_offset = toffset;
8953 len = tlen;
8954 }
8955 }
8956 }
8957 gcc_assert (base_reg != NULL);
8958
8959 return plus_constant (base_reg, base_offset);
8960 }
8961
8962 /* Emit code to save registers in the prologue. */
8963
8964 static void
8965 ix86_emit_save_regs (void)
8966 {
8967 unsigned int regno;
8968 rtx insn;
8969
8970 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8971 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8972 {
8973 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8974 RTX_FRAME_RELATED_P (insn) = 1;
8975 }
8976 }
8977
8978 /* Emit a single register save at CFA - CFA_OFFSET. */
8979
8980 static void
8981 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8982 HOST_WIDE_INT cfa_offset)
8983 {
8984 struct machine_function *m = cfun->machine;
8985 rtx reg = gen_rtx_REG (mode, regno);
8986 rtx mem, addr, base, insn;
8987
8988 addr = choose_baseaddr (cfa_offset);
8989 mem = gen_frame_mem (mode, addr);
8990
8991 /* For SSE saves, we need to indicate the 128-bit alignment. */
8992 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8993
8994 insn = emit_move_insn (mem, reg);
8995 RTX_FRAME_RELATED_P (insn) = 1;
8996
8997 base = addr;
8998 if (GET_CODE (base) == PLUS)
8999 base = XEXP (base, 0);
9000 gcc_checking_assert (REG_P (base));
9001
9002 /* When saving registers into a re-aligned local stack frame, avoid
9003 any tricky guessing by dwarf2out. */
9004 if (m->fs.realigned)
9005 {
9006 gcc_checking_assert (stack_realign_drap);
9007
9008 if (regno == REGNO (crtl->drap_reg))
9009 {
9010 /* A bit of a hack. We force the DRAP register to be saved in
9011 the re-aligned stack frame, which provides us with a copy
9012 of the CFA that will last past the prologue. Install it. */
9013 gcc_checking_assert (cfun->machine->fs.fp_valid);
9014 addr = plus_constant (hard_frame_pointer_rtx,
9015 cfun->machine->fs.fp_offset - cfa_offset);
9016 mem = gen_rtx_MEM (mode, addr);
9017 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9018 }
9019 else
9020 {
9021 /* The frame pointer is a stable reference within the
9022 aligned frame. Use it. */
9023 gcc_checking_assert (cfun->machine->fs.fp_valid);
9024 addr = plus_constant (hard_frame_pointer_rtx,
9025 cfun->machine->fs.fp_offset - cfa_offset);
9026 mem = gen_rtx_MEM (mode, addr);
9027 add_reg_note (insn, REG_CFA_EXPRESSION,
9028 gen_rtx_SET (VOIDmode, mem, reg));
9029 }
9030 }
9031
9032 /* The memory may not be relative to the current CFA register,
9033 which means that we may need to generate a new pattern for
9034 use by the unwind info. */
9035 else if (base != m->fs.cfa_reg)
9036 {
9037 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9038 mem = gen_rtx_MEM (mode, addr);
9039 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9040 }
9041 }
9042
9043 /* Emit code to save registers using MOV insns.
9044 First register is stored at CFA - CFA_OFFSET. */
9045 static void
9046 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9047 {
9048 unsigned int regno;
9049
9050 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9051 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9052 {
9053 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9054 cfa_offset -= UNITS_PER_WORD;
9055 }
9056 }
9057
9058 /* Emit code to save SSE registers using MOV insns.
9059 First register is stored at CFA - CFA_OFFSET. */
9060 static void
9061 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9062 {
9063 unsigned int regno;
9064
9065 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9066 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9067 {
9068 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9069 cfa_offset -= 16;
9070 }
9071 }
9072
9073 static GTY(()) rtx queued_cfa_restores;
9074
9075 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9076 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9077 Don't add the note if the previously saved value will be left untouched
9078 within stack red-zone till return, as unwinders can find the same value
9079 in the register and on the stack. */
9080
9081 static void
9082 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9083 {
9084 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9085 return;
9086
9087 if (insn)
9088 {
9089 add_reg_note (insn, REG_CFA_RESTORE, reg);
9090 RTX_FRAME_RELATED_P (insn) = 1;
9091 }
9092 else
9093 queued_cfa_restores
9094 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9095 }
9096
9097 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9098
9099 static void
9100 ix86_add_queued_cfa_restore_notes (rtx insn)
9101 {
9102 rtx last;
9103 if (!queued_cfa_restores)
9104 return;
9105 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9106 ;
9107 XEXP (last, 1) = REG_NOTES (insn);
9108 REG_NOTES (insn) = queued_cfa_restores;
9109 queued_cfa_restores = NULL_RTX;
9110 RTX_FRAME_RELATED_P (insn) = 1;
9111 }
9112
9113 /* Expand prologue or epilogue stack adjustment.
9114 The pattern exist to put a dependency on all ebp-based memory accesses.
9115 STYLE should be negative if instructions should be marked as frame related,
9116 zero if %r11 register is live and cannot be freely used and positive
9117 otherwise. */
9118
9119 static void
9120 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9121 int style, bool set_cfa)
9122 {
9123 struct machine_function *m = cfun->machine;
9124 rtx insn;
9125 bool add_frame_related_expr = false;
9126
9127 if (! TARGET_64BIT)
9128 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9129 else if (x86_64_immediate_operand (offset, DImode))
9130 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9131 else
9132 {
9133 rtx tmp;
9134 /* r11 is used by indirect sibcall return as well, set before the
9135 epilogue and used after the epilogue. */
9136 if (style)
9137 tmp = gen_rtx_REG (DImode, R11_REG);
9138 else
9139 {
9140 gcc_assert (src != hard_frame_pointer_rtx
9141 && dest != hard_frame_pointer_rtx);
9142 tmp = hard_frame_pointer_rtx;
9143 }
9144 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9145 if (style < 0)
9146 add_frame_related_expr = true;
9147
9148 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9149 }
9150
9151 insn = emit_insn (insn);
9152 if (style >= 0)
9153 ix86_add_queued_cfa_restore_notes (insn);
9154
9155 if (set_cfa)
9156 {
9157 rtx r;
9158
9159 gcc_assert (m->fs.cfa_reg == src);
9160 m->fs.cfa_offset += INTVAL (offset);
9161 m->fs.cfa_reg = dest;
9162
9163 r = gen_rtx_PLUS (Pmode, src, offset);
9164 r = gen_rtx_SET (VOIDmode, dest, r);
9165 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9166 RTX_FRAME_RELATED_P (insn) = 1;
9167 }
9168 else if (style < 0)
9169 {
9170 RTX_FRAME_RELATED_P (insn) = 1;
9171 if (add_frame_related_expr)
9172 {
9173 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9174 r = gen_rtx_SET (VOIDmode, dest, r);
9175 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9176 }
9177 }
9178
9179 if (dest == stack_pointer_rtx)
9180 {
9181 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9182 bool valid = m->fs.sp_valid;
9183
9184 if (src == hard_frame_pointer_rtx)
9185 {
9186 valid = m->fs.fp_valid;
9187 ooffset = m->fs.fp_offset;
9188 }
9189 else if (src == crtl->drap_reg)
9190 {
9191 valid = m->fs.drap_valid;
9192 ooffset = 0;
9193 }
9194 else
9195 {
9196 /* Else there are two possibilities: SP itself, which we set
9197 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9198 taken care of this by hand along the eh_return path. */
9199 gcc_checking_assert (src == stack_pointer_rtx
9200 || offset == const0_rtx);
9201 }
9202
9203 m->fs.sp_offset = ooffset - INTVAL (offset);
9204 m->fs.sp_valid = valid;
9205 }
9206 }
9207
9208 /* Find an available register to be used as dynamic realign argument
9209 pointer regsiter. Such a register will be written in prologue and
9210 used in begin of body, so it must not be
9211 1. parameter passing register.
9212 2. GOT pointer.
9213 We reuse static-chain register if it is available. Otherwise, we
9214 use DI for i386 and R13 for x86-64. We chose R13 since it has
9215 shorter encoding.
9216
9217 Return: the regno of chosen register. */
9218
9219 static unsigned int
9220 find_drap_reg (void)
9221 {
9222 tree decl = cfun->decl;
9223
9224 if (TARGET_64BIT)
9225 {
9226 /* Use R13 for nested function or function need static chain.
9227 Since function with tail call may use any caller-saved
9228 registers in epilogue, DRAP must not use caller-saved
9229 register in such case. */
9230 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9231 return R13_REG;
9232
9233 return R10_REG;
9234 }
9235 else
9236 {
9237 /* Use DI for nested function or function need static chain.
9238 Since function with tail call may use any caller-saved
9239 registers in epilogue, DRAP must not use caller-saved
9240 register in such case. */
9241 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9242 return DI_REG;
9243
9244 /* Reuse static chain register if it isn't used for parameter
9245 passing. */
9246 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9247 {
9248 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9249 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9250 return CX_REG;
9251 }
9252 return DI_REG;
9253 }
9254 }
9255
9256 /* Return minimum incoming stack alignment. */
9257
9258 static unsigned int
9259 ix86_minimum_incoming_stack_boundary (bool sibcall)
9260 {
9261 unsigned int incoming_stack_boundary;
9262
9263 /* Prefer the one specified at command line. */
9264 if (ix86_user_incoming_stack_boundary)
9265 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9266 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9267 if -mstackrealign is used, it isn't used for sibcall check and
9268 estimated stack alignment is 128bit. */
9269 else if (!sibcall
9270 && !TARGET_64BIT
9271 && ix86_force_align_arg_pointer
9272 && crtl->stack_alignment_estimated == 128)
9273 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9274 else
9275 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9276
9277 /* Incoming stack alignment can be changed on individual functions
9278 via force_align_arg_pointer attribute. We use the smallest
9279 incoming stack boundary. */
9280 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9281 && lookup_attribute (ix86_force_align_arg_pointer_string,
9282 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9283 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9284
9285 /* The incoming stack frame has to be aligned at least at
9286 parm_stack_boundary. */
9287 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9288 incoming_stack_boundary = crtl->parm_stack_boundary;
9289
9290 /* Stack at entrance of main is aligned by runtime. We use the
9291 smallest incoming stack boundary. */
9292 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9293 && DECL_NAME (current_function_decl)
9294 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9295 && DECL_FILE_SCOPE_P (current_function_decl))
9296 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9297
9298 return incoming_stack_boundary;
9299 }
9300
9301 /* Update incoming stack boundary and estimated stack alignment. */
9302
9303 static void
9304 ix86_update_stack_boundary (void)
9305 {
9306 ix86_incoming_stack_boundary
9307 = ix86_minimum_incoming_stack_boundary (false);
9308
9309 /* x86_64 vararg needs 16byte stack alignment for register save
9310 area. */
9311 if (TARGET_64BIT
9312 && cfun->stdarg
9313 && crtl->stack_alignment_estimated < 128)
9314 crtl->stack_alignment_estimated = 128;
9315 }
9316
9317 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9318 needed or an rtx for DRAP otherwise. */
9319
9320 static rtx
9321 ix86_get_drap_rtx (void)
9322 {
9323 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9324 crtl->need_drap = true;
9325
9326 if (stack_realign_drap)
9327 {
9328 /* Assign DRAP to vDRAP and returns vDRAP */
9329 unsigned int regno = find_drap_reg ();
9330 rtx drap_vreg;
9331 rtx arg_ptr;
9332 rtx seq, insn;
9333
9334 arg_ptr = gen_rtx_REG (Pmode, regno);
9335 crtl->drap_reg = arg_ptr;
9336
9337 start_sequence ();
9338 drap_vreg = copy_to_reg (arg_ptr);
9339 seq = get_insns ();
9340 end_sequence ();
9341
9342 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9343 if (!optimize)
9344 {
9345 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9346 RTX_FRAME_RELATED_P (insn) = 1;
9347 }
9348 return drap_vreg;
9349 }
9350 else
9351 return NULL;
9352 }
9353
9354 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9355
9356 static rtx
9357 ix86_internal_arg_pointer (void)
9358 {
9359 return virtual_incoming_args_rtx;
9360 }
9361
9362 struct scratch_reg {
9363 rtx reg;
9364 bool saved;
9365 };
9366
9367 /* Return a short-lived scratch register for use on function entry.
9368 In 32-bit mode, it is valid only after the registers are saved
9369 in the prologue. This register must be released by means of
9370 release_scratch_register_on_entry once it is dead. */
9371
9372 static void
9373 get_scratch_register_on_entry (struct scratch_reg *sr)
9374 {
9375 int regno;
9376
9377 sr->saved = false;
9378
9379 if (TARGET_64BIT)
9380 {
9381 /* We always use R11 in 64-bit mode. */
9382 regno = R11_REG;
9383 }
9384 else
9385 {
9386 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9387 bool fastcall_p
9388 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9389 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9390 int regparm = ix86_function_regparm (fntype, decl);
9391 int drap_regno
9392 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9393
9394 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9395 for the static chain register. */
9396 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9397 && drap_regno != AX_REG)
9398 regno = AX_REG;
9399 else if (regparm < 2 && drap_regno != DX_REG)
9400 regno = DX_REG;
9401 /* ecx is the static chain register. */
9402 else if (regparm < 3 && !fastcall_p && !static_chain_p
9403 && drap_regno != CX_REG)
9404 regno = CX_REG;
9405 else if (ix86_save_reg (BX_REG, true))
9406 regno = BX_REG;
9407 /* esi is the static chain register. */
9408 else if (!(regparm == 3 && static_chain_p)
9409 && ix86_save_reg (SI_REG, true))
9410 regno = SI_REG;
9411 else if (ix86_save_reg (DI_REG, true))
9412 regno = DI_REG;
9413 else
9414 {
9415 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9416 sr->saved = true;
9417 }
9418 }
9419
9420 sr->reg = gen_rtx_REG (Pmode, regno);
9421 if (sr->saved)
9422 {
9423 rtx insn = emit_insn (gen_push (sr->reg));
9424 RTX_FRAME_RELATED_P (insn) = 1;
9425 }
9426 }
9427
9428 /* Release a scratch register obtained from the preceding function. */
9429
9430 static void
9431 release_scratch_register_on_entry (struct scratch_reg *sr)
9432 {
9433 if (sr->saved)
9434 {
9435 rtx x, insn = emit_insn (gen_pop (sr->reg));
9436
9437 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9438 RTX_FRAME_RELATED_P (insn) = 1;
9439 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9440 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9442 }
9443 }
9444
9445 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9446
9447 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9448
9449 static void
9450 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9451 {
9452 /* We skip the probe for the first interval + a small dope of 4 words and
9453 probe that many bytes past the specified size to maintain a protection
9454 area at the botton of the stack. */
9455 const int dope = 4 * UNITS_PER_WORD;
9456 rtx size_rtx = GEN_INT (size), last;
9457
9458 /* See if we have a constant small number of probes to generate. If so,
9459 that's the easy case. The run-time loop is made up of 11 insns in the
9460 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9461 for n # of intervals. */
9462 if (size <= 5 * PROBE_INTERVAL)
9463 {
9464 HOST_WIDE_INT i, adjust;
9465 bool first_probe = true;
9466
9467 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9468 values of N from 1 until it exceeds SIZE. If only one probe is
9469 needed, this will not generate any code. Then adjust and probe
9470 to PROBE_INTERVAL + SIZE. */
9471 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9472 {
9473 if (first_probe)
9474 {
9475 adjust = 2 * PROBE_INTERVAL + dope;
9476 first_probe = false;
9477 }
9478 else
9479 adjust = PROBE_INTERVAL;
9480
9481 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9482 plus_constant (stack_pointer_rtx, -adjust)));
9483 emit_stack_probe (stack_pointer_rtx);
9484 }
9485
9486 if (first_probe)
9487 adjust = size + PROBE_INTERVAL + dope;
9488 else
9489 adjust = size + PROBE_INTERVAL - i;
9490
9491 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9492 plus_constant (stack_pointer_rtx, -adjust)));
9493 emit_stack_probe (stack_pointer_rtx);
9494
9495 /* Adjust back to account for the additional first interval. */
9496 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9497 plus_constant (stack_pointer_rtx,
9498 PROBE_INTERVAL + dope)));
9499 }
9500
9501 /* Otherwise, do the same as above, but in a loop. Note that we must be
9502 extra careful with variables wrapping around because we might be at
9503 the very top (or the very bottom) of the address space and we have
9504 to be able to handle this case properly; in particular, we use an
9505 equality test for the loop condition. */
9506 else
9507 {
9508 HOST_WIDE_INT rounded_size;
9509 struct scratch_reg sr;
9510
9511 get_scratch_register_on_entry (&sr);
9512
9513
9514 /* Step 1: round SIZE to the previous multiple of the interval. */
9515
9516 rounded_size = size & -PROBE_INTERVAL;
9517
9518
9519 /* Step 2: compute initial and final value of the loop counter. */
9520
9521 /* SP = SP_0 + PROBE_INTERVAL. */
9522 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9523 plus_constant (stack_pointer_rtx,
9524 - (PROBE_INTERVAL + dope))));
9525
9526 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9527 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9528 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9529 gen_rtx_PLUS (Pmode, sr.reg,
9530 stack_pointer_rtx)));
9531
9532
9533 /* Step 3: the loop
9534
9535 while (SP != LAST_ADDR)
9536 {
9537 SP = SP + PROBE_INTERVAL
9538 probe at SP
9539 }
9540
9541 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9542 values of N from 1 until it is equal to ROUNDED_SIZE. */
9543
9544 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9545
9546
9547 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9548 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9549
9550 if (size != rounded_size)
9551 {
9552 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9553 plus_constant (stack_pointer_rtx,
9554 rounded_size - size)));
9555 emit_stack_probe (stack_pointer_rtx);
9556 }
9557
9558 /* Adjust back to account for the additional first interval. */
9559 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9560 plus_constant (stack_pointer_rtx,
9561 PROBE_INTERVAL + dope)));
9562
9563 release_scratch_register_on_entry (&sr);
9564 }
9565
9566 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9567
9568 /* Even if the stack pointer isn't the CFA register, we need to correctly
9569 describe the adjustments made to it, in particular differentiate the
9570 frame-related ones from the frame-unrelated ones. */
9571 if (size > 0)
9572 {
9573 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9574 XVECEXP (expr, 0, 0)
9575 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9576 plus_constant (stack_pointer_rtx, -size));
9577 XVECEXP (expr, 0, 1)
9578 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9579 plus_constant (stack_pointer_rtx,
9580 PROBE_INTERVAL + dope + size));
9581 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9582 RTX_FRAME_RELATED_P (last) = 1;
9583
9584 cfun->machine->fs.sp_offset += size;
9585 }
9586
9587 /* Make sure nothing is scheduled before we are done. */
9588 emit_insn (gen_blockage ());
9589 }
9590
9591 /* Adjust the stack pointer up to REG while probing it. */
9592
9593 const char *
9594 output_adjust_stack_and_probe (rtx reg)
9595 {
9596 static int labelno = 0;
9597 char loop_lab[32], end_lab[32];
9598 rtx xops[2];
9599
9600 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9601 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9602
9603 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9604
9605 /* Jump to END_LAB if SP == LAST_ADDR. */
9606 xops[0] = stack_pointer_rtx;
9607 xops[1] = reg;
9608 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9609 fputs ("\tje\t", asm_out_file);
9610 assemble_name_raw (asm_out_file, end_lab);
9611 fputc ('\n', asm_out_file);
9612
9613 /* SP = SP + PROBE_INTERVAL. */
9614 xops[1] = GEN_INT (PROBE_INTERVAL);
9615 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9616
9617 /* Probe at SP. */
9618 xops[1] = const0_rtx;
9619 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9620
9621 fprintf (asm_out_file, "\tjmp\t");
9622 assemble_name_raw (asm_out_file, loop_lab);
9623 fputc ('\n', asm_out_file);
9624
9625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9626
9627 return "";
9628 }
9629
9630 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9631 inclusive. These are offsets from the current stack pointer. */
9632
9633 static void
9634 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9635 {
9636 /* See if we have a constant small number of probes to generate. If so,
9637 that's the easy case. The run-time loop is made up of 7 insns in the
9638 generic case while the compile-time loop is made up of n insns for n #
9639 of intervals. */
9640 if (size <= 7 * PROBE_INTERVAL)
9641 {
9642 HOST_WIDE_INT i;
9643
9644 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9645 it exceeds SIZE. If only one probe is needed, this will not
9646 generate any code. Then probe at FIRST + SIZE. */
9647 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9648 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9649
9650 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9651 }
9652
9653 /* Otherwise, do the same as above, but in a loop. Note that we must be
9654 extra careful with variables wrapping around because we might be at
9655 the very top (or the very bottom) of the address space and we have
9656 to be able to handle this case properly; in particular, we use an
9657 equality test for the loop condition. */
9658 else
9659 {
9660 HOST_WIDE_INT rounded_size, last;
9661 struct scratch_reg sr;
9662
9663 get_scratch_register_on_entry (&sr);
9664
9665
9666 /* Step 1: round SIZE to the previous multiple of the interval. */
9667
9668 rounded_size = size & -PROBE_INTERVAL;
9669
9670
9671 /* Step 2: compute initial and final value of the loop counter. */
9672
9673 /* TEST_OFFSET = FIRST. */
9674 emit_move_insn (sr.reg, GEN_INT (-first));
9675
9676 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9677 last = first + rounded_size;
9678
9679
9680 /* Step 3: the loop
9681
9682 while (TEST_ADDR != LAST_ADDR)
9683 {
9684 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9685 probe at TEST_ADDR
9686 }
9687
9688 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9689 until it is equal to ROUNDED_SIZE. */
9690
9691 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9692
9693
9694 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9695 that SIZE is equal to ROUNDED_SIZE. */
9696
9697 if (size != rounded_size)
9698 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9699 stack_pointer_rtx,
9700 sr.reg),
9701 rounded_size - size));
9702
9703 release_scratch_register_on_entry (&sr);
9704 }
9705
9706 /* Make sure nothing is scheduled before we are done. */
9707 emit_insn (gen_blockage ());
9708 }
9709
9710 /* Probe a range of stack addresses from REG to END, inclusive. These are
9711 offsets from the current stack pointer. */
9712
9713 const char *
9714 output_probe_stack_range (rtx reg, rtx end)
9715 {
9716 static int labelno = 0;
9717 char loop_lab[32], end_lab[32];
9718 rtx xops[3];
9719
9720 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9721 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9722
9723 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9724
9725 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9726 xops[0] = reg;
9727 xops[1] = end;
9728 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9729 fputs ("\tje\t", asm_out_file);
9730 assemble_name_raw (asm_out_file, end_lab);
9731 fputc ('\n', asm_out_file);
9732
9733 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9734 xops[1] = GEN_INT (PROBE_INTERVAL);
9735 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9736
9737 /* Probe at TEST_ADDR. */
9738 xops[0] = stack_pointer_rtx;
9739 xops[1] = reg;
9740 xops[2] = const0_rtx;
9741 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9742
9743 fprintf (asm_out_file, "\tjmp\t");
9744 assemble_name_raw (asm_out_file, loop_lab);
9745 fputc ('\n', asm_out_file);
9746
9747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9748
9749 return "";
9750 }
9751
9752 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9753 to be generated in correct form. */
9754 static void
9755 ix86_finalize_stack_realign_flags (void)
9756 {
9757 /* Check if stack realign is really needed after reload, and
9758 stores result in cfun */
9759 unsigned int incoming_stack_boundary
9760 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9761 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9762 unsigned int stack_realign = (incoming_stack_boundary
9763 < (current_function_is_leaf
9764 ? crtl->max_used_stack_slot_alignment
9765 : crtl->stack_alignment_needed));
9766
9767 if (crtl->stack_realign_finalized)
9768 {
9769 /* After stack_realign_needed is finalized, we can't no longer
9770 change it. */
9771 gcc_assert (crtl->stack_realign_needed == stack_realign);
9772 }
9773 else
9774 {
9775 crtl->stack_realign_needed = stack_realign;
9776 crtl->stack_realign_finalized = true;
9777 }
9778 }
9779
9780 /* Expand the prologue into a bunch of separate insns. */
9781
9782 void
9783 ix86_expand_prologue (void)
9784 {
9785 struct machine_function *m = cfun->machine;
9786 rtx insn, t;
9787 bool pic_reg_used;
9788 struct ix86_frame frame;
9789 HOST_WIDE_INT allocate;
9790 bool int_registers_saved;
9791
9792 ix86_finalize_stack_realign_flags ();
9793
9794 /* DRAP should not coexist with stack_realign_fp */
9795 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9796
9797 memset (&m->fs, 0, sizeof (m->fs));
9798
9799 /* Initialize CFA state for before the prologue. */
9800 m->fs.cfa_reg = stack_pointer_rtx;
9801 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9802
9803 /* Track SP offset to the CFA. We continue tracking this after we've
9804 swapped the CFA register away from SP. In the case of re-alignment
9805 this is fudged; we're interested to offsets within the local frame. */
9806 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9807 m->fs.sp_valid = true;
9808
9809 ix86_compute_frame_layout (&frame);
9810
9811 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9812 {
9813 /* We should have already generated an error for any use of
9814 ms_hook on a nested function. */
9815 gcc_checking_assert (!ix86_static_chain_on_stack);
9816
9817 /* Check if profiling is active and we shall use profiling before
9818 prologue variant. If so sorry. */
9819 if (crtl->profile && flag_fentry != 0)
9820 sorry ("ms_hook_prologue attribute isn%'t compatible "
9821 "with -mfentry for 32-bit");
9822
9823 /* In ix86_asm_output_function_label we emitted:
9824 8b ff movl.s %edi,%edi
9825 55 push %ebp
9826 8b ec movl.s %esp,%ebp
9827
9828 This matches the hookable function prologue in Win32 API
9829 functions in Microsoft Windows XP Service Pack 2 and newer.
9830 Wine uses this to enable Windows apps to hook the Win32 API
9831 functions provided by Wine.
9832
9833 What that means is that we've already set up the frame pointer. */
9834
9835 if (frame_pointer_needed
9836 && !(crtl->drap_reg && crtl->stack_realign_needed))
9837 {
9838 rtx push, mov;
9839
9840 /* We've decided to use the frame pointer already set up.
9841 Describe this to the unwinder by pretending that both
9842 push and mov insns happen right here.
9843
9844 Putting the unwind info here at the end of the ms_hook
9845 is done so that we can make absolutely certain we get
9846 the required byte sequence at the start of the function,
9847 rather than relying on an assembler that can produce
9848 the exact encoding required.
9849
9850 However it does mean (in the unpatched case) that we have
9851 a 1 insn window where the asynchronous unwind info is
9852 incorrect. However, if we placed the unwind info at
9853 its correct location we would have incorrect unwind info
9854 in the patched case. Which is probably all moot since
9855 I don't expect Wine generates dwarf2 unwind info for the
9856 system libraries that use this feature. */
9857
9858 insn = emit_insn (gen_blockage ());
9859
9860 push = gen_push (hard_frame_pointer_rtx);
9861 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9862 stack_pointer_rtx);
9863 RTX_FRAME_RELATED_P (push) = 1;
9864 RTX_FRAME_RELATED_P (mov) = 1;
9865
9866 RTX_FRAME_RELATED_P (insn) = 1;
9867 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9868 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9869
9870 /* Note that gen_push incremented m->fs.cfa_offset, even
9871 though we didn't emit the push insn here. */
9872 m->fs.cfa_reg = hard_frame_pointer_rtx;
9873 m->fs.fp_offset = m->fs.cfa_offset;
9874 m->fs.fp_valid = true;
9875 }
9876 else
9877 {
9878 /* The frame pointer is not needed so pop %ebp again.
9879 This leaves us with a pristine state. */
9880 emit_insn (gen_pop (hard_frame_pointer_rtx));
9881 }
9882 }
9883
9884 /* The first insn of a function that accepts its static chain on the
9885 stack is to push the register that would be filled in by a direct
9886 call. This insn will be skipped by the trampoline. */
9887 else if (ix86_static_chain_on_stack)
9888 {
9889 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9890 emit_insn (gen_blockage ());
9891
9892 /* We don't want to interpret this push insn as a register save,
9893 only as a stack adjustment. The real copy of the register as
9894 a save will be done later, if needed. */
9895 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9896 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9897 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9898 RTX_FRAME_RELATED_P (insn) = 1;
9899 }
9900
9901 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9902 of DRAP is needed and stack realignment is really needed after reload */
9903 if (stack_realign_drap)
9904 {
9905 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9906
9907 /* Only need to push parameter pointer reg if it is caller saved. */
9908 if (!call_used_regs[REGNO (crtl->drap_reg)])
9909 {
9910 /* Push arg pointer reg */
9911 insn = emit_insn (gen_push (crtl->drap_reg));
9912 RTX_FRAME_RELATED_P (insn) = 1;
9913 }
9914
9915 /* Grab the argument pointer. */
9916 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9917 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9918 RTX_FRAME_RELATED_P (insn) = 1;
9919 m->fs.cfa_reg = crtl->drap_reg;
9920 m->fs.cfa_offset = 0;
9921
9922 /* Align the stack. */
9923 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9924 stack_pointer_rtx,
9925 GEN_INT (-align_bytes)));
9926 RTX_FRAME_RELATED_P (insn) = 1;
9927
9928 /* Replicate the return address on the stack so that return
9929 address can be reached via (argp - 1) slot. This is needed
9930 to implement macro RETURN_ADDR_RTX and intrinsic function
9931 expand_builtin_return_addr etc. */
9932 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9933 t = gen_frame_mem (Pmode, t);
9934 insn = emit_insn (gen_push (t));
9935 RTX_FRAME_RELATED_P (insn) = 1;
9936
9937 /* For the purposes of frame and register save area addressing,
9938 we've started over with a new frame. */
9939 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9940 m->fs.realigned = true;
9941 }
9942
9943 if (frame_pointer_needed && !m->fs.fp_valid)
9944 {
9945 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9946 slower on all targets. Also sdb doesn't like it. */
9947 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9948 RTX_FRAME_RELATED_P (insn) = 1;
9949
9950 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9951 {
9952 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9953 RTX_FRAME_RELATED_P (insn) = 1;
9954
9955 if (m->fs.cfa_reg == stack_pointer_rtx)
9956 m->fs.cfa_reg = hard_frame_pointer_rtx;
9957 m->fs.fp_offset = m->fs.sp_offset;
9958 m->fs.fp_valid = true;
9959 }
9960 }
9961
9962 int_registers_saved = (frame.nregs == 0);
9963
9964 if (!int_registers_saved)
9965 {
9966 /* If saving registers via PUSH, do so now. */
9967 if (!frame.save_regs_using_mov)
9968 {
9969 ix86_emit_save_regs ();
9970 int_registers_saved = true;
9971 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9972 }
9973
9974 /* When using red zone we may start register saving before allocating
9975 the stack frame saving one cycle of the prologue. However, avoid
9976 doing this if we have to probe the stack; at least on x86_64 the
9977 stack probe can turn into a call that clobbers a red zone location. */
9978 else if (ix86_using_red_zone ()
9979 && (! TARGET_STACK_PROBE
9980 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9981 {
9982 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9983 int_registers_saved = true;
9984 }
9985 }
9986
9987 if (stack_realign_fp)
9988 {
9989 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9990 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9991
9992 /* The computation of the size of the re-aligned stack frame means
9993 that we must allocate the size of the register save area before
9994 performing the actual alignment. Otherwise we cannot guarantee
9995 that there's enough storage above the realignment point. */
9996 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9997 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9998 GEN_INT (m->fs.sp_offset
9999 - frame.sse_reg_save_offset),
10000 -1, false);
10001
10002 /* Align the stack. */
10003 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10004 stack_pointer_rtx,
10005 GEN_INT (-align_bytes)));
10006
10007 /* For the purposes of register save area addressing, the stack
10008 pointer is no longer valid. As for the value of sp_offset,
10009 see ix86_compute_frame_layout, which we need to match in order
10010 to pass verification of stack_pointer_offset at the end. */
10011 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10012 m->fs.sp_valid = false;
10013 }
10014
10015 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10016
10017 if (flag_stack_usage_info)
10018 {
10019 /* We start to count from ARG_POINTER. */
10020 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10021
10022 /* If it was realigned, take into account the fake frame. */
10023 if (stack_realign_drap)
10024 {
10025 if (ix86_static_chain_on_stack)
10026 stack_size += UNITS_PER_WORD;
10027
10028 if (!call_used_regs[REGNO (crtl->drap_reg)])
10029 stack_size += UNITS_PER_WORD;
10030
10031 /* This over-estimates by 1 minimal-stack-alignment-unit but
10032 mitigates that by counting in the new return address slot. */
10033 current_function_dynamic_stack_size
10034 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10035 }
10036
10037 current_function_static_stack_size = stack_size;
10038 }
10039
10040 /* The stack has already been decremented by the instruction calling us
10041 so probe if the size is non-negative to preserve the protection area. */
10042 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10043 {
10044 /* We expect the registers to be saved when probes are used. */
10045 gcc_assert (int_registers_saved);
10046
10047 if (STACK_CHECK_MOVING_SP)
10048 {
10049 ix86_adjust_stack_and_probe (allocate);
10050 allocate = 0;
10051 }
10052 else
10053 {
10054 HOST_WIDE_INT size = allocate;
10055
10056 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10057 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10058
10059 if (TARGET_STACK_PROBE)
10060 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10061 else
10062 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10063 }
10064 }
10065
10066 if (allocate == 0)
10067 ;
10068 else if (!ix86_target_stack_probe ()
10069 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10070 {
10071 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10072 GEN_INT (-allocate), -1,
10073 m->fs.cfa_reg == stack_pointer_rtx);
10074 }
10075 else
10076 {
10077 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10078 rtx r10 = NULL;
10079 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10080
10081 bool eax_live = false;
10082 bool r10_live = false;
10083
10084 if (TARGET_64BIT)
10085 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10086 if (!TARGET_64BIT_MS_ABI)
10087 eax_live = ix86_eax_live_at_start_p ();
10088
10089 if (eax_live)
10090 {
10091 emit_insn (gen_push (eax));
10092 allocate -= UNITS_PER_WORD;
10093 }
10094 if (r10_live)
10095 {
10096 r10 = gen_rtx_REG (Pmode, R10_REG);
10097 emit_insn (gen_push (r10));
10098 allocate -= UNITS_PER_WORD;
10099 }
10100
10101 emit_move_insn (eax, GEN_INT (allocate));
10102 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10103
10104 /* Use the fact that AX still contains ALLOCATE. */
10105 adjust_stack_insn = (TARGET_64BIT
10106 ? gen_pro_epilogue_adjust_stack_di_sub
10107 : gen_pro_epilogue_adjust_stack_si_sub);
10108
10109 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10110 stack_pointer_rtx, eax));
10111
10112 /* Note that SEH directives need to continue tracking the stack
10113 pointer even after the frame pointer has been set up. */
10114 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10115 {
10116 if (m->fs.cfa_reg == stack_pointer_rtx)
10117 m->fs.cfa_offset += allocate;
10118
10119 RTX_FRAME_RELATED_P (insn) = 1;
10120 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10121 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10122 plus_constant (stack_pointer_rtx,
10123 -allocate)));
10124 }
10125 m->fs.sp_offset += allocate;
10126
10127 if (r10_live && eax_live)
10128 {
10129 t = choose_baseaddr (m->fs.sp_offset - allocate);
10130 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10131 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10132 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10133 }
10134 else if (eax_live || r10_live)
10135 {
10136 t = choose_baseaddr (m->fs.sp_offset - allocate);
10137 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10138 }
10139 }
10140 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10141
10142 /* If we havn't already set up the frame pointer, do so now. */
10143 if (frame_pointer_needed && !m->fs.fp_valid)
10144 {
10145 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10146 GEN_INT (frame.stack_pointer_offset
10147 - frame.hard_frame_pointer_offset));
10148 insn = emit_insn (insn);
10149 RTX_FRAME_RELATED_P (insn) = 1;
10150 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10151
10152 if (m->fs.cfa_reg == stack_pointer_rtx)
10153 m->fs.cfa_reg = hard_frame_pointer_rtx;
10154 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10155 m->fs.fp_valid = true;
10156 }
10157
10158 if (!int_registers_saved)
10159 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10160 if (frame.nsseregs)
10161 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10162
10163 pic_reg_used = false;
10164 if (pic_offset_table_rtx
10165 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10166 || crtl->profile))
10167 {
10168 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10169
10170 if (alt_pic_reg_used != INVALID_REGNUM)
10171 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10172
10173 pic_reg_used = true;
10174 }
10175
10176 if (pic_reg_used)
10177 {
10178 if (TARGET_64BIT)
10179 {
10180 if (ix86_cmodel == CM_LARGE_PIC)
10181 {
10182 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10183 rtx label = gen_label_rtx ();
10184 emit_label (label);
10185 LABEL_PRESERVE_P (label) = 1;
10186 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10187 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10188 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10189 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10190 pic_offset_table_rtx, tmp_reg));
10191 }
10192 else
10193 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10194 }
10195 else
10196 {
10197 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10198 RTX_FRAME_RELATED_P (insn) = 1;
10199 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10200 }
10201 }
10202
10203 /* In the pic_reg_used case, make sure that the got load isn't deleted
10204 when mcount needs it. Blockage to avoid call movement across mcount
10205 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10206 note. */
10207 if (crtl->profile && !flag_fentry && pic_reg_used)
10208 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10209
10210 if (crtl->drap_reg && !crtl->stack_realign_needed)
10211 {
10212 /* vDRAP is setup but after reload it turns out stack realign
10213 isn't necessary, here we will emit prologue to setup DRAP
10214 without stack realign adjustment */
10215 t = choose_baseaddr (0);
10216 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10217 }
10218
10219 /* Prevent instructions from being scheduled into register save push
10220 sequence when access to the redzone area is done through frame pointer.
10221 The offset between the frame pointer and the stack pointer is calculated
10222 relative to the value of the stack pointer at the end of the function
10223 prologue, and moving instructions that access redzone area via frame
10224 pointer inside push sequence violates this assumption. */
10225 if (frame_pointer_needed && frame.red_zone_size)
10226 emit_insn (gen_memory_blockage ());
10227
10228 /* Emit cld instruction if stringops are used in the function. */
10229 if (TARGET_CLD && ix86_current_function_needs_cld)
10230 emit_insn (gen_cld ());
10231
10232 /* SEH requires that the prologue end within 256 bytes of the start of
10233 the function. Prevent instruction schedules that would extend that. */
10234 if (TARGET_SEH)
10235 emit_insn (gen_blockage ());
10236 }
10237
10238 /* Emit code to restore REG using a POP insn. */
10239
10240 static void
10241 ix86_emit_restore_reg_using_pop (rtx reg)
10242 {
10243 struct machine_function *m = cfun->machine;
10244 rtx insn = emit_insn (gen_pop (reg));
10245
10246 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10247 m->fs.sp_offset -= UNITS_PER_WORD;
10248
10249 if (m->fs.cfa_reg == crtl->drap_reg
10250 && REGNO (reg) == REGNO (crtl->drap_reg))
10251 {
10252 /* Previously we'd represented the CFA as an expression
10253 like *(%ebp - 8). We've just popped that value from
10254 the stack, which means we need to reset the CFA to
10255 the drap register. This will remain until we restore
10256 the stack pointer. */
10257 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10258 RTX_FRAME_RELATED_P (insn) = 1;
10259
10260 /* This means that the DRAP register is valid for addressing too. */
10261 m->fs.drap_valid = true;
10262 return;
10263 }
10264
10265 if (m->fs.cfa_reg == stack_pointer_rtx)
10266 {
10267 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10269 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10270 RTX_FRAME_RELATED_P (insn) = 1;
10271
10272 m->fs.cfa_offset -= UNITS_PER_WORD;
10273 }
10274
10275 /* When the frame pointer is the CFA, and we pop it, we are
10276 swapping back to the stack pointer as the CFA. This happens
10277 for stack frames that don't allocate other data, so we assume
10278 the stack pointer is now pointing at the return address, i.e.
10279 the function entry state, which makes the offset be 1 word. */
10280 if (reg == hard_frame_pointer_rtx)
10281 {
10282 m->fs.fp_valid = false;
10283 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10284 {
10285 m->fs.cfa_reg = stack_pointer_rtx;
10286 m->fs.cfa_offset -= UNITS_PER_WORD;
10287
10288 add_reg_note (insn, REG_CFA_DEF_CFA,
10289 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10290 GEN_INT (m->fs.cfa_offset)));
10291 RTX_FRAME_RELATED_P (insn) = 1;
10292 }
10293 }
10294 }
10295
10296 /* Emit code to restore saved registers using POP insns. */
10297
10298 static void
10299 ix86_emit_restore_regs_using_pop (void)
10300 {
10301 unsigned int regno;
10302
10303 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10304 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10305 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10306 }
10307
10308 /* Emit code and notes for the LEAVE instruction. */
10309
10310 static void
10311 ix86_emit_leave (void)
10312 {
10313 struct machine_function *m = cfun->machine;
10314 rtx insn = emit_insn (ix86_gen_leave ());
10315
10316 ix86_add_queued_cfa_restore_notes (insn);
10317
10318 gcc_assert (m->fs.fp_valid);
10319 m->fs.sp_valid = true;
10320 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10321 m->fs.fp_valid = false;
10322
10323 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10324 {
10325 m->fs.cfa_reg = stack_pointer_rtx;
10326 m->fs.cfa_offset = m->fs.sp_offset;
10327
10328 add_reg_note (insn, REG_CFA_DEF_CFA,
10329 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10330 RTX_FRAME_RELATED_P (insn) = 1;
10331 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10332 m->fs.fp_offset);
10333 }
10334 }
10335
10336 /* Emit code to restore saved registers using MOV insns.
10337 First register is restored from CFA - CFA_OFFSET. */
10338 static void
10339 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10340 bool maybe_eh_return)
10341 {
10342 struct machine_function *m = cfun->machine;
10343 unsigned int regno;
10344
10345 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10346 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10347 {
10348 rtx reg = gen_rtx_REG (Pmode, regno);
10349 rtx insn, mem;
10350
10351 mem = choose_baseaddr (cfa_offset);
10352 mem = gen_frame_mem (Pmode, mem);
10353 insn = emit_move_insn (reg, mem);
10354
10355 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10356 {
10357 /* Previously we'd represented the CFA as an expression
10358 like *(%ebp - 8). We've just popped that value from
10359 the stack, which means we need to reset the CFA to
10360 the drap register. This will remain until we restore
10361 the stack pointer. */
10362 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10363 RTX_FRAME_RELATED_P (insn) = 1;
10364
10365 /* This means that the DRAP register is valid for addressing. */
10366 m->fs.drap_valid = true;
10367 }
10368 else
10369 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10370
10371 cfa_offset -= UNITS_PER_WORD;
10372 }
10373 }
10374
10375 /* Emit code to restore saved registers using MOV insns.
10376 First register is restored from CFA - CFA_OFFSET. */
10377 static void
10378 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10379 bool maybe_eh_return)
10380 {
10381 unsigned int regno;
10382
10383 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10384 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10385 {
10386 rtx reg = gen_rtx_REG (V4SFmode, regno);
10387 rtx mem;
10388
10389 mem = choose_baseaddr (cfa_offset);
10390 mem = gen_rtx_MEM (V4SFmode, mem);
10391 set_mem_align (mem, 128);
10392 emit_move_insn (reg, mem);
10393
10394 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10395
10396 cfa_offset -= 16;
10397 }
10398 }
10399
10400 /* Restore function stack, frame, and registers. */
10401
10402 void
10403 ix86_expand_epilogue (int style)
10404 {
10405 struct machine_function *m = cfun->machine;
10406 struct machine_frame_state frame_state_save = m->fs;
10407 struct ix86_frame frame;
10408 bool restore_regs_via_mov;
10409 bool using_drap;
10410
10411 ix86_finalize_stack_realign_flags ();
10412 ix86_compute_frame_layout (&frame);
10413
10414 m->fs.sp_valid = (!frame_pointer_needed
10415 || (current_function_sp_is_unchanging
10416 && !stack_realign_fp));
10417 gcc_assert (!m->fs.sp_valid
10418 || m->fs.sp_offset == frame.stack_pointer_offset);
10419
10420 /* The FP must be valid if the frame pointer is present. */
10421 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10422 gcc_assert (!m->fs.fp_valid
10423 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10424
10425 /* We must have *some* valid pointer to the stack frame. */
10426 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10427
10428 /* The DRAP is never valid at this point. */
10429 gcc_assert (!m->fs.drap_valid);
10430
10431 /* See the comment about red zone and frame
10432 pointer usage in ix86_expand_prologue. */
10433 if (frame_pointer_needed && frame.red_zone_size)
10434 emit_insn (gen_memory_blockage ());
10435
10436 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10437 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10438
10439 /* Determine the CFA offset of the end of the red-zone. */
10440 m->fs.red_zone_offset = 0;
10441 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10442 {
10443 /* The red-zone begins below the return address. */
10444 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10445
10446 /* When the register save area is in the aligned portion of
10447 the stack, determine the maximum runtime displacement that
10448 matches up with the aligned frame. */
10449 if (stack_realign_drap)
10450 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10451 + UNITS_PER_WORD);
10452 }
10453
10454 /* Special care must be taken for the normal return case of a function
10455 using eh_return: the eax and edx registers are marked as saved, but
10456 not restored along this path. Adjust the save location to match. */
10457 if (crtl->calls_eh_return && style != 2)
10458 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10459
10460 /* EH_RETURN requires the use of moves to function properly. */
10461 if (crtl->calls_eh_return)
10462 restore_regs_via_mov = true;
10463 /* SEH requires the use of pops to identify the epilogue. */
10464 else if (TARGET_SEH)
10465 restore_regs_via_mov = false;
10466 /* If we're only restoring one register and sp is not valid then
10467 using a move instruction to restore the register since it's
10468 less work than reloading sp and popping the register. */
10469 else if (!m->fs.sp_valid && frame.nregs <= 1)
10470 restore_regs_via_mov = true;
10471 else if (TARGET_EPILOGUE_USING_MOVE
10472 && cfun->machine->use_fast_prologue_epilogue
10473 && (frame.nregs > 1
10474 || m->fs.sp_offset != frame.reg_save_offset))
10475 restore_regs_via_mov = true;
10476 else if (frame_pointer_needed
10477 && !frame.nregs
10478 && m->fs.sp_offset != frame.reg_save_offset)
10479 restore_regs_via_mov = true;
10480 else if (frame_pointer_needed
10481 && TARGET_USE_LEAVE
10482 && cfun->machine->use_fast_prologue_epilogue
10483 && frame.nregs == 1)
10484 restore_regs_via_mov = true;
10485 else
10486 restore_regs_via_mov = false;
10487
10488 if (restore_regs_via_mov || frame.nsseregs)
10489 {
10490 /* Ensure that the entire register save area is addressable via
10491 the stack pointer, if we will restore via sp. */
10492 if (TARGET_64BIT
10493 && m->fs.sp_offset > 0x7fffffff
10494 && !(m->fs.fp_valid || m->fs.drap_valid)
10495 && (frame.nsseregs + frame.nregs) != 0)
10496 {
10497 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10498 GEN_INT (m->fs.sp_offset
10499 - frame.sse_reg_save_offset),
10500 style,
10501 m->fs.cfa_reg == stack_pointer_rtx);
10502 }
10503 }
10504
10505 /* If there are any SSE registers to restore, then we have to do it
10506 via moves, since there's obviously no pop for SSE regs. */
10507 if (frame.nsseregs)
10508 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10509 style == 2);
10510
10511 if (restore_regs_via_mov)
10512 {
10513 rtx t;
10514
10515 if (frame.nregs)
10516 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10517
10518 /* eh_return epilogues need %ecx added to the stack pointer. */
10519 if (style == 2)
10520 {
10521 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10522
10523 /* Stack align doesn't work with eh_return. */
10524 gcc_assert (!stack_realign_drap);
10525 /* Neither does regparm nested functions. */
10526 gcc_assert (!ix86_static_chain_on_stack);
10527
10528 if (frame_pointer_needed)
10529 {
10530 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10531 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10532 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10533
10534 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10535 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10536
10537 /* Note that we use SA as a temporary CFA, as the return
10538 address is at the proper place relative to it. We
10539 pretend this happens at the FP restore insn because
10540 prior to this insn the FP would be stored at the wrong
10541 offset relative to SA, and after this insn we have no
10542 other reasonable register to use for the CFA. We don't
10543 bother resetting the CFA to the SP for the duration of
10544 the return insn. */
10545 add_reg_note (insn, REG_CFA_DEF_CFA,
10546 plus_constant (sa, UNITS_PER_WORD));
10547 ix86_add_queued_cfa_restore_notes (insn);
10548 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10549 RTX_FRAME_RELATED_P (insn) = 1;
10550
10551 m->fs.cfa_reg = sa;
10552 m->fs.cfa_offset = UNITS_PER_WORD;
10553 m->fs.fp_valid = false;
10554
10555 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10556 const0_rtx, style, false);
10557 }
10558 else
10559 {
10560 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10561 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10562 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10563 ix86_add_queued_cfa_restore_notes (insn);
10564
10565 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10566 if (m->fs.cfa_offset != UNITS_PER_WORD)
10567 {
10568 m->fs.cfa_offset = UNITS_PER_WORD;
10569 add_reg_note (insn, REG_CFA_DEF_CFA,
10570 plus_constant (stack_pointer_rtx,
10571 UNITS_PER_WORD));
10572 RTX_FRAME_RELATED_P (insn) = 1;
10573 }
10574 }
10575 m->fs.sp_offset = UNITS_PER_WORD;
10576 m->fs.sp_valid = true;
10577 }
10578 }
10579 else
10580 {
10581 /* SEH requires that the function end with (1) a stack adjustment
10582 if necessary, (2) a sequence of pops, and (3) a return or
10583 jump instruction. Prevent insns from the function body from
10584 being scheduled into this sequence. */
10585 if (TARGET_SEH)
10586 {
10587 /* Prevent a catch region from being adjacent to the standard
10588 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10589 several other flags that would be interesting to test are
10590 not yet set up. */
10591 if (flag_non_call_exceptions)
10592 emit_insn (gen_nops (const1_rtx));
10593 else
10594 emit_insn (gen_blockage ());
10595 }
10596
10597 /* First step is to deallocate the stack frame so that we can
10598 pop the registers. */
10599 if (!m->fs.sp_valid)
10600 {
10601 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10602 GEN_INT (m->fs.fp_offset
10603 - frame.reg_save_offset),
10604 style, false);
10605 }
10606 else if (m->fs.sp_offset != frame.reg_save_offset)
10607 {
10608 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10609 GEN_INT (m->fs.sp_offset
10610 - frame.reg_save_offset),
10611 style,
10612 m->fs.cfa_reg == stack_pointer_rtx);
10613 }
10614
10615 ix86_emit_restore_regs_using_pop ();
10616 }
10617
10618 /* If we used a stack pointer and haven't already got rid of it,
10619 then do so now. */
10620 if (m->fs.fp_valid)
10621 {
10622 /* If the stack pointer is valid and pointing at the frame
10623 pointer store address, then we only need a pop. */
10624 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10625 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10626 /* Leave results in shorter dependency chains on CPUs that are
10627 able to grok it fast. */
10628 else if (TARGET_USE_LEAVE
10629 || optimize_function_for_size_p (cfun)
10630 || !cfun->machine->use_fast_prologue_epilogue)
10631 ix86_emit_leave ();
10632 else
10633 {
10634 pro_epilogue_adjust_stack (stack_pointer_rtx,
10635 hard_frame_pointer_rtx,
10636 const0_rtx, style, !using_drap);
10637 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10638 }
10639 }
10640
10641 if (using_drap)
10642 {
10643 int param_ptr_offset = UNITS_PER_WORD;
10644 rtx insn;
10645
10646 gcc_assert (stack_realign_drap);
10647
10648 if (ix86_static_chain_on_stack)
10649 param_ptr_offset += UNITS_PER_WORD;
10650 if (!call_used_regs[REGNO (crtl->drap_reg)])
10651 param_ptr_offset += UNITS_PER_WORD;
10652
10653 insn = emit_insn (gen_rtx_SET
10654 (VOIDmode, stack_pointer_rtx,
10655 gen_rtx_PLUS (Pmode,
10656 crtl->drap_reg,
10657 GEN_INT (-param_ptr_offset))));
10658 m->fs.cfa_reg = stack_pointer_rtx;
10659 m->fs.cfa_offset = param_ptr_offset;
10660 m->fs.sp_offset = param_ptr_offset;
10661 m->fs.realigned = false;
10662
10663 add_reg_note (insn, REG_CFA_DEF_CFA,
10664 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10665 GEN_INT (param_ptr_offset)));
10666 RTX_FRAME_RELATED_P (insn) = 1;
10667
10668 if (!call_used_regs[REGNO (crtl->drap_reg)])
10669 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10670 }
10671
10672 /* At this point the stack pointer must be valid, and we must have
10673 restored all of the registers. We may not have deallocated the
10674 entire stack frame. We've delayed this until now because it may
10675 be possible to merge the local stack deallocation with the
10676 deallocation forced by ix86_static_chain_on_stack. */
10677 gcc_assert (m->fs.sp_valid);
10678 gcc_assert (!m->fs.fp_valid);
10679 gcc_assert (!m->fs.realigned);
10680 if (m->fs.sp_offset != UNITS_PER_WORD)
10681 {
10682 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10683 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10684 style, true);
10685 }
10686
10687 /* Sibcall epilogues don't want a return instruction. */
10688 if (style == 0)
10689 {
10690 m->fs = frame_state_save;
10691 return;
10692 }
10693
10694 /* Emit vzeroupper if needed. */
10695 if (TARGET_VZEROUPPER
10696 && !TREE_THIS_VOLATILE (cfun->decl)
10697 && !cfun->machine->caller_return_avx256_p)
10698 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10699
10700 if (crtl->args.pops_args && crtl->args.size)
10701 {
10702 rtx popc = GEN_INT (crtl->args.pops_args);
10703
10704 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10705 address, do explicit add, and jump indirectly to the caller. */
10706
10707 if (crtl->args.pops_args >= 65536)
10708 {
10709 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10710 rtx insn;
10711
10712 /* There is no "pascal" calling convention in any 64bit ABI. */
10713 gcc_assert (!TARGET_64BIT);
10714
10715 insn = emit_insn (gen_pop (ecx));
10716 m->fs.cfa_offset -= UNITS_PER_WORD;
10717 m->fs.sp_offset -= UNITS_PER_WORD;
10718
10719 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10720 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10721 add_reg_note (insn, REG_CFA_REGISTER,
10722 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10723 RTX_FRAME_RELATED_P (insn) = 1;
10724
10725 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10726 popc, -1, true);
10727 emit_jump_insn (gen_return_indirect_internal (ecx));
10728 }
10729 else
10730 emit_jump_insn (gen_return_pop_internal (popc));
10731 }
10732 else
10733 emit_jump_insn (gen_return_internal ());
10734
10735 /* Restore the state back to the state from the prologue,
10736 so that it's correct for the next epilogue. */
10737 m->fs = frame_state_save;
10738 }
10739
10740 /* Reset from the function's potential modifications. */
10741
10742 static void
10743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10745 {
10746 if (pic_offset_table_rtx)
10747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10748 #if TARGET_MACHO
10749 /* Mach-O doesn't support labels at the end of objects, so if
10750 it looks like we might want one, insert a NOP. */
10751 {
10752 rtx insn = get_last_insn ();
10753 while (insn
10754 && NOTE_P (insn)
10755 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10756 insn = PREV_INSN (insn);
10757 if (insn
10758 && (LABEL_P (insn)
10759 || (NOTE_P (insn)
10760 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10761 fputs ("\tnop\n", file);
10762 }
10763 #endif
10764
10765 }
10766
10767 /* Return a scratch register to use in the split stack prologue. The
10768 split stack prologue is used for -fsplit-stack. It is the first
10769 instructions in the function, even before the regular prologue.
10770 The scratch register can be any caller-saved register which is not
10771 used for parameters or for the static chain. */
10772
10773 static unsigned int
10774 split_stack_prologue_scratch_regno (void)
10775 {
10776 if (TARGET_64BIT)
10777 return R11_REG;
10778 else
10779 {
10780 bool is_fastcall;
10781 int regparm;
10782
10783 is_fastcall = (lookup_attribute ("fastcall",
10784 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10785 != NULL);
10786 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10787
10788 if (is_fastcall)
10789 {
10790 if (DECL_STATIC_CHAIN (cfun->decl))
10791 {
10792 sorry ("-fsplit-stack does not support fastcall with "
10793 "nested function");
10794 return INVALID_REGNUM;
10795 }
10796 return AX_REG;
10797 }
10798 else if (regparm < 3)
10799 {
10800 if (!DECL_STATIC_CHAIN (cfun->decl))
10801 return CX_REG;
10802 else
10803 {
10804 if (regparm >= 2)
10805 {
10806 sorry ("-fsplit-stack does not support 2 register "
10807 " parameters for a nested function");
10808 return INVALID_REGNUM;
10809 }
10810 return DX_REG;
10811 }
10812 }
10813 else
10814 {
10815 /* FIXME: We could make this work by pushing a register
10816 around the addition and comparison. */
10817 sorry ("-fsplit-stack does not support 3 register parameters");
10818 return INVALID_REGNUM;
10819 }
10820 }
10821 }
10822
10823 /* A SYMBOL_REF for the function which allocates new stackspace for
10824 -fsplit-stack. */
10825
10826 static GTY(()) rtx split_stack_fn;
10827
10828 /* A SYMBOL_REF for the more stack function when using the large
10829 model. */
10830
10831 static GTY(()) rtx split_stack_fn_large;
10832
10833 /* Handle -fsplit-stack. These are the first instructions in the
10834 function, even before the regular prologue. */
10835
10836 void
10837 ix86_expand_split_stack_prologue (void)
10838 {
10839 struct ix86_frame frame;
10840 HOST_WIDE_INT allocate;
10841 unsigned HOST_WIDE_INT args_size;
10842 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10843 rtx scratch_reg = NULL_RTX;
10844 rtx varargs_label = NULL_RTX;
10845 rtx fn;
10846
10847 gcc_assert (flag_split_stack && reload_completed);
10848
10849 ix86_finalize_stack_realign_flags ();
10850 ix86_compute_frame_layout (&frame);
10851 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10852
10853 /* This is the label we will branch to if we have enough stack
10854 space. We expect the basic block reordering pass to reverse this
10855 branch if optimizing, so that we branch in the unlikely case. */
10856 label = gen_label_rtx ();
10857
10858 /* We need to compare the stack pointer minus the frame size with
10859 the stack boundary in the TCB. The stack boundary always gives
10860 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10861 can compare directly. Otherwise we need to do an addition. */
10862
10863 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10864 UNSPEC_STACK_CHECK);
10865 limit = gen_rtx_CONST (Pmode, limit);
10866 limit = gen_rtx_MEM (Pmode, limit);
10867 if (allocate < SPLIT_STACK_AVAILABLE)
10868 current = stack_pointer_rtx;
10869 else
10870 {
10871 unsigned int scratch_regno;
10872 rtx offset;
10873
10874 /* We need a scratch register to hold the stack pointer minus
10875 the required frame size. Since this is the very start of the
10876 function, the scratch register can be any caller-saved
10877 register which is not used for parameters. */
10878 offset = GEN_INT (- allocate);
10879 scratch_regno = split_stack_prologue_scratch_regno ();
10880 if (scratch_regno == INVALID_REGNUM)
10881 return;
10882 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10883 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10884 {
10885 /* We don't use ix86_gen_add3 in this case because it will
10886 want to split to lea, but when not optimizing the insn
10887 will not be split after this point. */
10888 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10889 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10890 offset)));
10891 }
10892 else
10893 {
10894 emit_move_insn (scratch_reg, offset);
10895 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10896 stack_pointer_rtx));
10897 }
10898 current = scratch_reg;
10899 }
10900
10901 ix86_expand_branch (GEU, current, limit, label);
10902 jump_insn = get_last_insn ();
10903 JUMP_LABEL (jump_insn) = label;
10904
10905 /* Mark the jump as very likely to be taken. */
10906 add_reg_note (jump_insn, REG_BR_PROB,
10907 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10908
10909 if (split_stack_fn == NULL_RTX)
10910 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10911 fn = split_stack_fn;
10912
10913 /* Get more stack space. We pass in the desired stack space and the
10914 size of the arguments to copy to the new stack. In 32-bit mode
10915 we push the parameters; __morestack will return on a new stack
10916 anyhow. In 64-bit mode we pass the parameters in r10 and
10917 r11. */
10918 allocate_rtx = GEN_INT (allocate);
10919 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10920 call_fusage = NULL_RTX;
10921 if (TARGET_64BIT)
10922 {
10923 rtx reg10, reg11;
10924
10925 reg10 = gen_rtx_REG (Pmode, R10_REG);
10926 reg11 = gen_rtx_REG (Pmode, R11_REG);
10927
10928 /* If this function uses a static chain, it will be in %r10.
10929 Preserve it across the call to __morestack. */
10930 if (DECL_STATIC_CHAIN (cfun->decl))
10931 {
10932 rtx rax;
10933
10934 rax = gen_rtx_REG (Pmode, AX_REG);
10935 emit_move_insn (rax, reg10);
10936 use_reg (&call_fusage, rax);
10937 }
10938
10939 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10940 {
10941 HOST_WIDE_INT argval;
10942
10943 /* When using the large model we need to load the address
10944 into a register, and we've run out of registers. So we
10945 switch to a different calling convention, and we call a
10946 different function: __morestack_large. We pass the
10947 argument size in the upper 32 bits of r10 and pass the
10948 frame size in the lower 32 bits. */
10949 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10950 gcc_assert ((args_size & 0xffffffff) == args_size);
10951
10952 if (split_stack_fn_large == NULL_RTX)
10953 split_stack_fn_large =
10954 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10955
10956 if (ix86_cmodel == CM_LARGE_PIC)
10957 {
10958 rtx label, x;
10959
10960 label = gen_label_rtx ();
10961 emit_label (label);
10962 LABEL_PRESERVE_P (label) = 1;
10963 emit_insn (gen_set_rip_rex64 (reg10, label));
10964 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10965 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10966 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10967 UNSPEC_GOT);
10968 x = gen_rtx_CONST (Pmode, x);
10969 emit_move_insn (reg11, x);
10970 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10971 x = gen_const_mem (Pmode, x);
10972 emit_move_insn (reg11, x);
10973 }
10974 else
10975 emit_move_insn (reg11, split_stack_fn_large);
10976
10977 fn = reg11;
10978
10979 argval = ((args_size << 16) << 16) + allocate;
10980 emit_move_insn (reg10, GEN_INT (argval));
10981 }
10982 else
10983 {
10984 emit_move_insn (reg10, allocate_rtx);
10985 emit_move_insn (reg11, GEN_INT (args_size));
10986 use_reg (&call_fusage, reg11);
10987 }
10988
10989 use_reg (&call_fusage, reg10);
10990 }
10991 else
10992 {
10993 emit_insn (gen_push (GEN_INT (args_size)));
10994 emit_insn (gen_push (allocate_rtx));
10995 }
10996 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10997 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10998 NULL_RTX, false);
10999 add_function_usage_to (call_insn, call_fusage);
11000
11001 /* In order to make call/return prediction work right, we now need
11002 to execute a return instruction. See
11003 libgcc/config/i386/morestack.S for the details on how this works.
11004
11005 For flow purposes gcc must not see this as a return
11006 instruction--we need control flow to continue at the subsequent
11007 label. Therefore, we use an unspec. */
11008 gcc_assert (crtl->args.pops_args < 65536);
11009 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11010
11011 /* If we are in 64-bit mode and this function uses a static chain,
11012 we saved %r10 in %rax before calling _morestack. */
11013 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11014 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11015 gen_rtx_REG (Pmode, AX_REG));
11016
11017 /* If this function calls va_start, we need to store a pointer to
11018 the arguments on the old stack, because they may not have been
11019 all copied to the new stack. At this point the old stack can be
11020 found at the frame pointer value used by __morestack, because
11021 __morestack has set that up before calling back to us. Here we
11022 store that pointer in a scratch register, and in
11023 ix86_expand_prologue we store the scratch register in a stack
11024 slot. */
11025 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11026 {
11027 unsigned int scratch_regno;
11028 rtx frame_reg;
11029 int words;
11030
11031 scratch_regno = split_stack_prologue_scratch_regno ();
11032 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11033 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11034
11035 /* 64-bit:
11036 fp -> old fp value
11037 return address within this function
11038 return address of caller of this function
11039 stack arguments
11040 So we add three words to get to the stack arguments.
11041
11042 32-bit:
11043 fp -> old fp value
11044 return address within this function
11045 first argument to __morestack
11046 second argument to __morestack
11047 return address of caller of this function
11048 stack arguments
11049 So we add five words to get to the stack arguments.
11050 */
11051 words = TARGET_64BIT ? 3 : 5;
11052 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11053 gen_rtx_PLUS (Pmode, frame_reg,
11054 GEN_INT (words * UNITS_PER_WORD))));
11055
11056 varargs_label = gen_label_rtx ();
11057 emit_jump_insn (gen_jump (varargs_label));
11058 JUMP_LABEL (get_last_insn ()) = varargs_label;
11059
11060 emit_barrier ();
11061 }
11062
11063 emit_label (label);
11064 LABEL_NUSES (label) = 1;
11065
11066 /* If this function calls va_start, we now have to set the scratch
11067 register for the case where we do not call __morestack. In this
11068 case we need to set it based on the stack pointer. */
11069 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11070 {
11071 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11072 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11073 GEN_INT (UNITS_PER_WORD))));
11074
11075 emit_label (varargs_label);
11076 LABEL_NUSES (varargs_label) = 1;
11077 }
11078 }
11079
11080 /* We may have to tell the dataflow pass that the split stack prologue
11081 is initializing a scratch register. */
11082
11083 static void
11084 ix86_live_on_entry (bitmap regs)
11085 {
11086 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11087 {
11088 gcc_assert (flag_split_stack);
11089 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11090 }
11091 }
11092 \f
11093 /* Extract the parts of an RTL expression that is a valid memory address
11094 for an instruction. Return 0 if the structure of the address is
11095 grossly off. Return -1 if the address contains ASHIFT, so it is not
11096 strictly valid, but still used for computing length of lea instruction. */
11097
11098 int
11099 ix86_decompose_address (rtx addr, struct ix86_address *out)
11100 {
11101 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11102 rtx base_reg, index_reg;
11103 HOST_WIDE_INT scale = 1;
11104 rtx scale_rtx = NULL_RTX;
11105 rtx tmp;
11106 int retval = 1;
11107 enum ix86_address_seg seg = SEG_DEFAULT;
11108
11109 if (REG_P (addr))
11110 base = addr;
11111 else if (GET_CODE (addr) == SUBREG)
11112 {
11113 /* Allow only subregs of DImode hard regs. */
11114 if (register_no_elim_operand (SUBREG_REG (addr), DImode))
11115 base = addr;
11116 else
11117 return 0;
11118 }
11119 else if (GET_CODE (addr) == PLUS)
11120 {
11121 rtx addends[4], op;
11122 int n = 0, i;
11123
11124 op = addr;
11125 do
11126 {
11127 if (n >= 4)
11128 return 0;
11129 addends[n++] = XEXP (op, 1);
11130 op = XEXP (op, 0);
11131 }
11132 while (GET_CODE (op) == PLUS);
11133 if (n >= 4)
11134 return 0;
11135 addends[n] = op;
11136
11137 for (i = n; i >= 0; --i)
11138 {
11139 op = addends[i];
11140 switch (GET_CODE (op))
11141 {
11142 case MULT:
11143 if (index)
11144 return 0;
11145 index = XEXP (op, 0);
11146 scale_rtx = XEXP (op, 1);
11147 break;
11148
11149 case ASHIFT:
11150 if (index)
11151 return 0;
11152 index = XEXP (op, 0);
11153 tmp = XEXP (op, 1);
11154 if (!CONST_INT_P (tmp))
11155 return 0;
11156 scale = INTVAL (tmp);
11157 if ((unsigned HOST_WIDE_INT) scale > 3)
11158 return 0;
11159 scale = 1 << scale;
11160 break;
11161
11162 case UNSPEC:
11163 if (XINT (op, 1) == UNSPEC_TP
11164 && TARGET_TLS_DIRECT_SEG_REFS
11165 && seg == SEG_DEFAULT)
11166 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11167 else
11168 return 0;
11169 break;
11170
11171 case SUBREG:
11172 /* Allow only subregs of DImode hard regs in PLUS chains. */
11173 if (!register_no_elim_operand (SUBREG_REG (op), DImode))
11174 return 0;
11175 /* FALLTHRU */
11176
11177 case REG:
11178 if (!base)
11179 base = op;
11180 else if (!index)
11181 index = op;
11182 else
11183 return 0;
11184 break;
11185
11186 case CONST:
11187 case CONST_INT:
11188 case SYMBOL_REF:
11189 case LABEL_REF:
11190 if (disp)
11191 return 0;
11192 disp = op;
11193 break;
11194
11195 default:
11196 return 0;
11197 }
11198 }
11199 }
11200 else if (GET_CODE (addr) == MULT)
11201 {
11202 index = XEXP (addr, 0); /* index*scale */
11203 scale_rtx = XEXP (addr, 1);
11204 }
11205 else if (GET_CODE (addr) == ASHIFT)
11206 {
11207 /* We're called for lea too, which implements ashift on occasion. */
11208 index = XEXP (addr, 0);
11209 tmp = XEXP (addr, 1);
11210 if (!CONST_INT_P (tmp))
11211 return 0;
11212 scale = INTVAL (tmp);
11213 if ((unsigned HOST_WIDE_INT) scale > 3)
11214 return 0;
11215 scale = 1 << scale;
11216 retval = -1;
11217 }
11218 else
11219 disp = addr; /* displacement */
11220
11221 if (index)
11222 {
11223 if (REG_P (index))
11224 ;
11225 /* Allow only subregs of DImode hard regs. */
11226 else if (GET_CODE (index) == SUBREG
11227 && register_no_elim_operand (SUBREG_REG (index), DImode))
11228 ;
11229 else
11230 return 0;
11231 }
11232
11233 /* Extract the integral value of scale. */
11234 if (scale_rtx)
11235 {
11236 if (!CONST_INT_P (scale_rtx))
11237 return 0;
11238 scale = INTVAL (scale_rtx);
11239 }
11240
11241 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11242 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11243
11244 /* Avoid useless 0 displacement. */
11245 if (disp == const0_rtx && (base || index))
11246 disp = NULL_RTX;
11247
11248 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11249 if (base_reg && index_reg && scale == 1
11250 && (index_reg == arg_pointer_rtx
11251 || index_reg == frame_pointer_rtx
11252 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11253 {
11254 rtx tmp;
11255 tmp = base, base = index, index = tmp;
11256 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11257 }
11258
11259 /* Special case: %ebp cannot be encoded as a base without a displacement.
11260 Similarly %r13. */
11261 if (!disp
11262 && base_reg
11263 && (base_reg == hard_frame_pointer_rtx
11264 || base_reg == frame_pointer_rtx
11265 || base_reg == arg_pointer_rtx
11266 || (REG_P (base_reg)
11267 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11268 || REGNO (base_reg) == R13_REG))))
11269 disp = const0_rtx;
11270
11271 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11272 Avoid this by transforming to [%esi+0].
11273 Reload calls address legitimization without cfun defined, so we need
11274 to test cfun for being non-NULL. */
11275 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11276 && base_reg && !index_reg && !disp
11277 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11278 disp = const0_rtx;
11279
11280 /* Special case: encode reg+reg instead of reg*2. */
11281 if (!base && index && scale == 2)
11282 base = index, base_reg = index_reg, scale = 1;
11283
11284 /* Special case: scaling cannot be encoded without base or displacement. */
11285 if (!base && !disp && index && scale != 1)
11286 disp = const0_rtx;
11287
11288 out->base = base;
11289 out->index = index;
11290 out->disp = disp;
11291 out->scale = scale;
11292 out->seg = seg;
11293
11294 return retval;
11295 }
11296 \f
11297 /* Return cost of the memory address x.
11298 For i386, it is better to use a complex address than let gcc copy
11299 the address into a reg and make a new pseudo. But not if the address
11300 requires to two regs - that would mean more pseudos with longer
11301 lifetimes. */
11302 static int
11303 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11304 {
11305 struct ix86_address parts;
11306 int cost = 1;
11307 int ok = ix86_decompose_address (x, &parts);
11308
11309 gcc_assert (ok);
11310
11311 if (parts.base && GET_CODE (parts.base) == SUBREG)
11312 parts.base = SUBREG_REG (parts.base);
11313 if (parts.index && GET_CODE (parts.index) == SUBREG)
11314 parts.index = SUBREG_REG (parts.index);
11315
11316 /* Attempt to minimize number of registers in the address. */
11317 if ((parts.base
11318 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11319 || (parts.index
11320 && (!REG_P (parts.index)
11321 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11322 cost++;
11323
11324 if (parts.base
11325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11326 && parts.index
11327 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11328 && parts.base != parts.index)
11329 cost++;
11330
11331 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11332 since it's predecode logic can't detect the length of instructions
11333 and it degenerates to vector decoded. Increase cost of such
11334 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11335 to split such addresses or even refuse such addresses at all.
11336
11337 Following addressing modes are affected:
11338 [base+scale*index]
11339 [scale*index+disp]
11340 [base+index]
11341
11342 The first and last case may be avoidable by explicitly coding the zero in
11343 memory address, but I don't have AMD-K6 machine handy to check this
11344 theory. */
11345
11346 if (TARGET_K6
11347 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11348 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11349 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11350 cost += 10;
11351
11352 return cost;
11353 }
11354 \f
11355 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11356 this is used for to form addresses to local data when -fPIC is in
11357 use. */
11358
11359 static bool
11360 darwin_local_data_pic (rtx disp)
11361 {
11362 return (GET_CODE (disp) == UNSPEC
11363 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11364 }
11365
11366 /* Determine if a given RTX is a valid constant. We already know this
11367 satisfies CONSTANT_P. */
11368
11369 static bool
11370 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11371 {
11372 switch (GET_CODE (x))
11373 {
11374 case CONST:
11375 x = XEXP (x, 0);
11376
11377 if (GET_CODE (x) == PLUS)
11378 {
11379 if (!CONST_INT_P (XEXP (x, 1)))
11380 return false;
11381 x = XEXP (x, 0);
11382 }
11383
11384 if (TARGET_MACHO && darwin_local_data_pic (x))
11385 return true;
11386
11387 /* Only some unspecs are valid as "constants". */
11388 if (GET_CODE (x) == UNSPEC)
11389 switch (XINT (x, 1))
11390 {
11391 case UNSPEC_GOT:
11392 case UNSPEC_GOTOFF:
11393 case UNSPEC_PLTOFF:
11394 return TARGET_64BIT;
11395 case UNSPEC_TPOFF:
11396 case UNSPEC_NTPOFF:
11397 x = XVECEXP (x, 0, 0);
11398 return (GET_CODE (x) == SYMBOL_REF
11399 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11400 case UNSPEC_DTPOFF:
11401 x = XVECEXP (x, 0, 0);
11402 return (GET_CODE (x) == SYMBOL_REF
11403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11404 default:
11405 return false;
11406 }
11407
11408 /* We must have drilled down to a symbol. */
11409 if (GET_CODE (x) == LABEL_REF)
11410 return true;
11411 if (GET_CODE (x) != SYMBOL_REF)
11412 return false;
11413 /* FALLTHRU */
11414
11415 case SYMBOL_REF:
11416 /* TLS symbols are never valid. */
11417 if (SYMBOL_REF_TLS_MODEL (x))
11418 return false;
11419
11420 /* DLLIMPORT symbols are never valid. */
11421 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11422 && SYMBOL_REF_DLLIMPORT_P (x))
11423 return false;
11424
11425 #if TARGET_MACHO
11426 /* mdynamic-no-pic */
11427 if (MACHO_DYNAMIC_NO_PIC_P)
11428 return machopic_symbol_defined_p (x);
11429 #endif
11430 break;
11431
11432 case CONST_DOUBLE:
11433 if (GET_MODE (x) == TImode
11434 && x != CONST0_RTX (TImode)
11435 && !TARGET_64BIT)
11436 return false;
11437 break;
11438
11439 case CONST_VECTOR:
11440 if (!standard_sse_constant_p (x))
11441 return false;
11442
11443 default:
11444 break;
11445 }
11446
11447 /* Otherwise we handle everything else in the move patterns. */
11448 return true;
11449 }
11450
11451 /* Determine if it's legal to put X into the constant pool. This
11452 is not possible for the address of thread-local symbols, which
11453 is checked above. */
11454
11455 static bool
11456 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11457 {
11458 /* We can always put integral constants and vectors in memory. */
11459 switch (GET_CODE (x))
11460 {
11461 case CONST_INT:
11462 case CONST_DOUBLE:
11463 case CONST_VECTOR:
11464 return false;
11465
11466 default:
11467 break;
11468 }
11469 return !ix86_legitimate_constant_p (mode, x);
11470 }
11471
11472
11473 /* Nonzero if the constant value X is a legitimate general operand
11474 when generating PIC code. It is given that flag_pic is on and
11475 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11476
11477 bool
11478 legitimate_pic_operand_p (rtx x)
11479 {
11480 rtx inner;
11481
11482 switch (GET_CODE (x))
11483 {
11484 case CONST:
11485 inner = XEXP (x, 0);
11486 if (GET_CODE (inner) == PLUS
11487 && CONST_INT_P (XEXP (inner, 1)))
11488 inner = XEXP (inner, 0);
11489
11490 /* Only some unspecs are valid as "constants". */
11491 if (GET_CODE (inner) == UNSPEC)
11492 switch (XINT (inner, 1))
11493 {
11494 case UNSPEC_GOT:
11495 case UNSPEC_GOTOFF:
11496 case UNSPEC_PLTOFF:
11497 return TARGET_64BIT;
11498 case UNSPEC_TPOFF:
11499 x = XVECEXP (inner, 0, 0);
11500 return (GET_CODE (x) == SYMBOL_REF
11501 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11502 case UNSPEC_MACHOPIC_OFFSET:
11503 return legitimate_pic_address_disp_p (x);
11504 default:
11505 return false;
11506 }
11507 /* FALLTHRU */
11508
11509 case SYMBOL_REF:
11510 case LABEL_REF:
11511 return legitimate_pic_address_disp_p (x);
11512
11513 default:
11514 return true;
11515 }
11516 }
11517
11518 /* Determine if a given CONST RTX is a valid memory displacement
11519 in PIC mode. */
11520
11521 bool
11522 legitimate_pic_address_disp_p (rtx disp)
11523 {
11524 bool saw_plus;
11525
11526 /* In 64bit mode we can allow direct addresses of symbols and labels
11527 when they are not dynamic symbols. */
11528 if (TARGET_64BIT)
11529 {
11530 rtx op0 = disp, op1;
11531
11532 switch (GET_CODE (disp))
11533 {
11534 case LABEL_REF:
11535 return true;
11536
11537 case CONST:
11538 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11539 break;
11540 op0 = XEXP (XEXP (disp, 0), 0);
11541 op1 = XEXP (XEXP (disp, 0), 1);
11542 if (!CONST_INT_P (op1)
11543 || INTVAL (op1) >= 16*1024*1024
11544 || INTVAL (op1) < -16*1024*1024)
11545 break;
11546 if (GET_CODE (op0) == LABEL_REF)
11547 return true;
11548 if (GET_CODE (op0) != SYMBOL_REF)
11549 break;
11550 /* FALLTHRU */
11551
11552 case SYMBOL_REF:
11553 /* TLS references should always be enclosed in UNSPEC. */
11554 if (SYMBOL_REF_TLS_MODEL (op0))
11555 return false;
11556 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11557 && ix86_cmodel != CM_LARGE_PIC)
11558 return true;
11559 break;
11560
11561 default:
11562 break;
11563 }
11564 }
11565 if (GET_CODE (disp) != CONST)
11566 return false;
11567 disp = XEXP (disp, 0);
11568
11569 if (TARGET_64BIT)
11570 {
11571 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11572 of GOT tables. We should not need these anyway. */
11573 if (GET_CODE (disp) != UNSPEC
11574 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11575 && XINT (disp, 1) != UNSPEC_GOTOFF
11576 && XINT (disp, 1) != UNSPEC_PCREL
11577 && XINT (disp, 1) != UNSPEC_PLTOFF))
11578 return false;
11579
11580 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11581 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11582 return false;
11583 return true;
11584 }
11585
11586 saw_plus = false;
11587 if (GET_CODE (disp) == PLUS)
11588 {
11589 if (!CONST_INT_P (XEXP (disp, 1)))
11590 return false;
11591 disp = XEXP (disp, 0);
11592 saw_plus = true;
11593 }
11594
11595 if (TARGET_MACHO && darwin_local_data_pic (disp))
11596 return true;
11597
11598 if (GET_CODE (disp) != UNSPEC)
11599 return false;
11600
11601 switch (XINT (disp, 1))
11602 {
11603 case UNSPEC_GOT:
11604 if (saw_plus)
11605 return false;
11606 /* We need to check for both symbols and labels because VxWorks loads
11607 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11608 details. */
11609 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11610 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11611 case UNSPEC_GOTOFF:
11612 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11613 While ABI specify also 32bit relocation but we don't produce it in
11614 small PIC model at all. */
11615 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11616 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11617 && !TARGET_64BIT)
11618 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11619 return false;
11620 case UNSPEC_GOTTPOFF:
11621 case UNSPEC_GOTNTPOFF:
11622 case UNSPEC_INDNTPOFF:
11623 if (saw_plus)
11624 return false;
11625 disp = XVECEXP (disp, 0, 0);
11626 return (GET_CODE (disp) == SYMBOL_REF
11627 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11628 case UNSPEC_NTPOFF:
11629 disp = XVECEXP (disp, 0, 0);
11630 return (GET_CODE (disp) == SYMBOL_REF
11631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11632 case UNSPEC_DTPOFF:
11633 disp = XVECEXP (disp, 0, 0);
11634 return (GET_CODE (disp) == SYMBOL_REF
11635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11636 }
11637
11638 return false;
11639 }
11640
11641 /* Recognizes RTL expressions that are valid memory addresses for an
11642 instruction. The MODE argument is the machine mode for the MEM
11643 expression that wants to use this address.
11644
11645 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11646 convert common non-canonical forms to canonical form so that they will
11647 be recognized. */
11648
11649 static bool
11650 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11651 rtx addr, bool strict)
11652 {
11653 struct ix86_address parts;
11654 rtx base, index, disp;
11655 HOST_WIDE_INT scale;
11656
11657 if (ix86_decompose_address (addr, &parts) <= 0)
11658 /* Decomposition failed. */
11659 return false;
11660
11661 base = parts.base;
11662 index = parts.index;
11663 disp = parts.disp;
11664 scale = parts.scale;
11665
11666 /* Validate base register. */
11667 if (base)
11668 {
11669 rtx reg;
11670
11671 if (REG_P (base))
11672 reg = base;
11673 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11674 {
11675 reg = SUBREG_REG (base);
11676 gcc_assert (register_no_elim_operand (reg, DImode));
11677 }
11678 else
11679 /* Base is not a register. */
11680 return false;
11681
11682 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11683 return false;
11684
11685 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11686 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11687 /* Base is not valid. */
11688 return false;
11689 }
11690
11691 /* Validate index register. */
11692 if (index)
11693 {
11694 rtx reg;
11695
11696 if (REG_P (index))
11697 reg = index;
11698 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11699 {
11700 reg = SUBREG_REG (index);
11701 gcc_assert (register_no_elim_operand (reg, DImode));
11702 }
11703 else
11704 /* Index is not a register. */
11705 return false;
11706
11707 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11708 return false;
11709
11710 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11711 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11712 /* Index is not valid. */
11713 return false;
11714 }
11715
11716 /* Index and base should have the same mode. */
11717 if (base && index
11718 && GET_MODE (base) != GET_MODE (index))
11719 return false;
11720
11721 /* Validate scale factor. */
11722 if (scale != 1)
11723 {
11724 if (!index)
11725 /* Scale without index. */
11726 return false;
11727
11728 if (scale != 2 && scale != 4 && scale != 8)
11729 /* Scale is not a valid multiplier. */
11730 return false;
11731 }
11732
11733 /* Validate displacement. */
11734 if (disp)
11735 {
11736 if (GET_CODE (disp) == CONST
11737 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11738 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11739 switch (XINT (XEXP (disp, 0), 1))
11740 {
11741 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11742 used. While ABI specify also 32bit relocations, we don't produce
11743 them at all and use IP relative instead. */
11744 case UNSPEC_GOT:
11745 case UNSPEC_GOTOFF:
11746 gcc_assert (flag_pic);
11747 if (!TARGET_64BIT)
11748 goto is_legitimate_pic;
11749
11750 /* 64bit address unspec. */
11751 return false;
11752
11753 case UNSPEC_GOTPCREL:
11754 case UNSPEC_PCREL:
11755 gcc_assert (flag_pic);
11756 goto is_legitimate_pic;
11757
11758 case UNSPEC_GOTTPOFF:
11759 case UNSPEC_GOTNTPOFF:
11760 case UNSPEC_INDNTPOFF:
11761 case UNSPEC_NTPOFF:
11762 case UNSPEC_DTPOFF:
11763 break;
11764
11765 case UNSPEC_STACK_CHECK:
11766 gcc_assert (flag_split_stack);
11767 break;
11768
11769 default:
11770 /* Invalid address unspec. */
11771 return false;
11772 }
11773
11774 else if (SYMBOLIC_CONST (disp)
11775 && (flag_pic
11776 || (TARGET_MACHO
11777 #if TARGET_MACHO
11778 && MACHOPIC_INDIRECT
11779 && !machopic_operand_p (disp)
11780 #endif
11781 )))
11782 {
11783
11784 is_legitimate_pic:
11785 if (TARGET_64BIT && (index || base))
11786 {
11787 /* foo@dtpoff(%rX) is ok. */
11788 if (GET_CODE (disp) != CONST
11789 || GET_CODE (XEXP (disp, 0)) != PLUS
11790 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11791 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11792 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11793 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11794 /* Non-constant pic memory reference. */
11795 return false;
11796 }
11797 else if ((!TARGET_MACHO || flag_pic)
11798 && ! legitimate_pic_address_disp_p (disp))
11799 /* Displacement is an invalid pic construct. */
11800 return false;
11801 #if TARGET_MACHO
11802 else if (MACHO_DYNAMIC_NO_PIC_P
11803 && !ix86_legitimate_constant_p (Pmode, disp))
11804 /* displacment must be referenced via non_lazy_pointer */
11805 return false;
11806 #endif
11807
11808 /* This code used to verify that a symbolic pic displacement
11809 includes the pic_offset_table_rtx register.
11810
11811 While this is good idea, unfortunately these constructs may
11812 be created by "adds using lea" optimization for incorrect
11813 code like:
11814
11815 int a;
11816 int foo(int i)
11817 {
11818 return *(&a+i);
11819 }
11820
11821 This code is nonsensical, but results in addressing
11822 GOT table with pic_offset_table_rtx base. We can't
11823 just refuse it easily, since it gets matched by
11824 "addsi3" pattern, that later gets split to lea in the
11825 case output register differs from input. While this
11826 can be handled by separate addsi pattern for this case
11827 that never results in lea, this seems to be easier and
11828 correct fix for crash to disable this test. */
11829 }
11830 else if (GET_CODE (disp) != LABEL_REF
11831 && !CONST_INT_P (disp)
11832 && (GET_CODE (disp) != CONST
11833 || !ix86_legitimate_constant_p (Pmode, disp))
11834 && (GET_CODE (disp) != SYMBOL_REF
11835 || !ix86_legitimate_constant_p (Pmode, disp)))
11836 /* Displacement is not constant. */
11837 return false;
11838 else if (TARGET_64BIT
11839 && !x86_64_immediate_operand (disp, VOIDmode))
11840 /* Displacement is out of range. */
11841 return false;
11842 }
11843
11844 /* Everything looks valid. */
11845 return true;
11846 }
11847
11848 /* Determine if a given RTX is a valid constant address. */
11849
11850 bool
11851 constant_address_p (rtx x)
11852 {
11853 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11854 }
11855 \f
11856 /* Return a unique alias set for the GOT. */
11857
11858 static alias_set_type
11859 ix86_GOT_alias_set (void)
11860 {
11861 static alias_set_type set = -1;
11862 if (set == -1)
11863 set = new_alias_set ();
11864 return set;
11865 }
11866
11867 /* Return a legitimate reference for ORIG (an address) using the
11868 register REG. If REG is 0, a new pseudo is generated.
11869
11870 There are two types of references that must be handled:
11871
11872 1. Global data references must load the address from the GOT, via
11873 the PIC reg. An insn is emitted to do this load, and the reg is
11874 returned.
11875
11876 2. Static data references, constant pool addresses, and code labels
11877 compute the address as an offset from the GOT, whose base is in
11878 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11879 differentiate them from global data objects. The returned
11880 address is the PIC reg + an unspec constant.
11881
11882 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11883 reg also appears in the address. */
11884
11885 static rtx
11886 legitimize_pic_address (rtx orig, rtx reg)
11887 {
11888 rtx addr = orig;
11889 rtx new_rtx = orig;
11890 rtx base;
11891
11892 #if TARGET_MACHO
11893 if (TARGET_MACHO && !TARGET_64BIT)
11894 {
11895 if (reg == 0)
11896 reg = gen_reg_rtx (Pmode);
11897 /* Use the generic Mach-O PIC machinery. */
11898 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11899 }
11900 #endif
11901
11902 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11903 new_rtx = addr;
11904 else if (TARGET_64BIT
11905 && ix86_cmodel != CM_SMALL_PIC
11906 && gotoff_operand (addr, Pmode))
11907 {
11908 rtx tmpreg;
11909 /* This symbol may be referenced via a displacement from the PIC
11910 base address (@GOTOFF). */
11911
11912 if (reload_in_progress)
11913 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11914 if (GET_CODE (addr) == CONST)
11915 addr = XEXP (addr, 0);
11916 if (GET_CODE (addr) == PLUS)
11917 {
11918 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11919 UNSPEC_GOTOFF);
11920 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11921 }
11922 else
11923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11924 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11925 if (!reg)
11926 tmpreg = gen_reg_rtx (Pmode);
11927 else
11928 tmpreg = reg;
11929 emit_move_insn (tmpreg, new_rtx);
11930
11931 if (reg != 0)
11932 {
11933 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11934 tmpreg, 1, OPTAB_DIRECT);
11935 new_rtx = reg;
11936 }
11937 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11938 }
11939 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11940 {
11941 /* This symbol may be referenced via a displacement from the PIC
11942 base address (@GOTOFF). */
11943
11944 if (reload_in_progress)
11945 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11946 if (GET_CODE (addr) == CONST)
11947 addr = XEXP (addr, 0);
11948 if (GET_CODE (addr) == PLUS)
11949 {
11950 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11951 UNSPEC_GOTOFF);
11952 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11953 }
11954 else
11955 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11957 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11958
11959 if (reg != 0)
11960 {
11961 emit_move_insn (reg, new_rtx);
11962 new_rtx = reg;
11963 }
11964 }
11965 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11966 /* We can't use @GOTOFF for text labels on VxWorks;
11967 see gotoff_operand. */
11968 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11969 {
11970 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11971 {
11972 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11973 return legitimize_dllimport_symbol (addr, true);
11974 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11975 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11976 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11977 {
11978 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11979 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11980 }
11981 }
11982
11983 /* For x64 PE-COFF there is no GOT table. So we use address
11984 directly. */
11985 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11986 {
11987 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11988 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11989
11990 if (reg == 0)
11991 reg = gen_reg_rtx (Pmode);
11992 emit_move_insn (reg, new_rtx);
11993 new_rtx = reg;
11994 }
11995 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11996 {
11997 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11998 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11999 new_rtx = gen_const_mem (Pmode, new_rtx);
12000 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12001
12002 if (reg == 0)
12003 reg = gen_reg_rtx (Pmode);
12004 /* Use directly gen_movsi, otherwise the address is loaded
12005 into register for CSE. We don't want to CSE this addresses,
12006 instead we CSE addresses from the GOT table, so skip this. */
12007 emit_insn (gen_movsi (reg, new_rtx));
12008 new_rtx = reg;
12009 }
12010 else
12011 {
12012 /* This symbol must be referenced via a load from the
12013 Global Offset Table (@GOT). */
12014
12015 if (reload_in_progress)
12016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12017 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12018 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12019 if (TARGET_64BIT)
12020 new_rtx = force_reg (Pmode, new_rtx);
12021 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12022 new_rtx = gen_const_mem (Pmode, new_rtx);
12023 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12024
12025 if (reg == 0)
12026 reg = gen_reg_rtx (Pmode);
12027 emit_move_insn (reg, new_rtx);
12028 new_rtx = reg;
12029 }
12030 }
12031 else
12032 {
12033 if (CONST_INT_P (addr)
12034 && !x86_64_immediate_operand (addr, VOIDmode))
12035 {
12036 if (reg)
12037 {
12038 emit_move_insn (reg, addr);
12039 new_rtx = reg;
12040 }
12041 else
12042 new_rtx = force_reg (Pmode, addr);
12043 }
12044 else if (GET_CODE (addr) == CONST)
12045 {
12046 addr = XEXP (addr, 0);
12047
12048 /* We must match stuff we generate before. Assume the only
12049 unspecs that can get here are ours. Not that we could do
12050 anything with them anyway.... */
12051 if (GET_CODE (addr) == UNSPEC
12052 || (GET_CODE (addr) == PLUS
12053 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12054 return orig;
12055 gcc_assert (GET_CODE (addr) == PLUS);
12056 }
12057 if (GET_CODE (addr) == PLUS)
12058 {
12059 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12060
12061 /* Check first to see if this is a constant offset from a @GOTOFF
12062 symbol reference. */
12063 if (gotoff_operand (op0, Pmode)
12064 && CONST_INT_P (op1))
12065 {
12066 if (!TARGET_64BIT)
12067 {
12068 if (reload_in_progress)
12069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12070 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12071 UNSPEC_GOTOFF);
12072 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12073 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12074 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12075
12076 if (reg != 0)
12077 {
12078 emit_move_insn (reg, new_rtx);
12079 new_rtx = reg;
12080 }
12081 }
12082 else
12083 {
12084 if (INTVAL (op1) < -16*1024*1024
12085 || INTVAL (op1) >= 16*1024*1024)
12086 {
12087 if (!x86_64_immediate_operand (op1, Pmode))
12088 op1 = force_reg (Pmode, op1);
12089 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12090 }
12091 }
12092 }
12093 else
12094 {
12095 base = legitimize_pic_address (XEXP (addr, 0), reg);
12096 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12097 base == reg ? NULL_RTX : reg);
12098
12099 if (CONST_INT_P (new_rtx))
12100 new_rtx = plus_constant (base, INTVAL (new_rtx));
12101 else
12102 {
12103 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12104 {
12105 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12106 new_rtx = XEXP (new_rtx, 1);
12107 }
12108 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12109 }
12110 }
12111 }
12112 }
12113 return new_rtx;
12114 }
12115 \f
12116 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12117
12118 static rtx
12119 get_thread_pointer (bool to_reg)
12120 {
12121 rtx tp, reg, insn;
12122
12123 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12124 if (!to_reg)
12125 return tp;
12126
12127 reg = gen_reg_rtx (Pmode);
12128 insn = gen_rtx_SET (VOIDmode, reg, tp);
12129 insn = emit_insn (insn);
12130
12131 return reg;
12132 }
12133
12134 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12135
12136 static GTY(()) rtx ix86_tls_symbol;
12137
12138 static rtx
12139 ix86_tls_get_addr (void)
12140 {
12141 if (!ix86_tls_symbol)
12142 {
12143 const char *sym
12144 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12145 ? "___tls_get_addr" : "__tls_get_addr");
12146
12147 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12148 }
12149
12150 return ix86_tls_symbol;
12151 }
12152
12153 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12154
12155 static GTY(()) rtx ix86_tls_module_base_symbol;
12156
12157 rtx
12158 ix86_tls_module_base (void)
12159 {
12160 if (!ix86_tls_module_base_symbol)
12161 {
12162 ix86_tls_module_base_symbol
12163 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12164
12165 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12166 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12167 }
12168
12169 return ix86_tls_module_base_symbol;
12170 }
12171
12172 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12173 false if we expect this to be used for a memory address and true if
12174 we expect to load the address into a register. */
12175
12176 static rtx
12177 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12178 {
12179 rtx dest, base, off;
12180 rtx pic = NULL_RTX, tp = NULL_RTX;
12181 int type;
12182
12183 switch (model)
12184 {
12185 case TLS_MODEL_GLOBAL_DYNAMIC:
12186 dest = gen_reg_rtx (Pmode);
12187
12188 if (!TARGET_64BIT)
12189 {
12190 if (flag_pic)
12191 pic = pic_offset_table_rtx;
12192 else
12193 {
12194 pic = gen_reg_rtx (Pmode);
12195 emit_insn (gen_set_got (pic));
12196 }
12197 }
12198
12199 if (TARGET_GNU2_TLS)
12200 {
12201 if (TARGET_64BIT)
12202 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12203 else
12204 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12205
12206 tp = get_thread_pointer (true);
12207 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12208
12209 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12210 }
12211 else
12212 {
12213 rtx caddr = ix86_tls_get_addr ();
12214
12215 if (TARGET_64BIT)
12216 {
12217 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12218
12219 start_sequence ();
12220 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12221 insns = get_insns ();
12222 end_sequence ();
12223
12224 RTL_CONST_CALL_P (insns) = 1;
12225 emit_libcall_block (insns, dest, rax, x);
12226 }
12227 else
12228 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12229 }
12230 break;
12231
12232 case TLS_MODEL_LOCAL_DYNAMIC:
12233 base = gen_reg_rtx (Pmode);
12234
12235 if (!TARGET_64BIT)
12236 {
12237 if (flag_pic)
12238 pic = pic_offset_table_rtx;
12239 else
12240 {
12241 pic = gen_reg_rtx (Pmode);
12242 emit_insn (gen_set_got (pic));
12243 }
12244 }
12245
12246 if (TARGET_GNU2_TLS)
12247 {
12248 rtx tmp = ix86_tls_module_base ();
12249
12250 if (TARGET_64BIT)
12251 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12252 else
12253 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12254
12255 tp = get_thread_pointer (true);
12256 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12257 gen_rtx_MINUS (Pmode, tmp, tp));
12258 }
12259 else
12260 {
12261 rtx caddr = ix86_tls_get_addr ();
12262
12263 if (TARGET_64BIT)
12264 {
12265 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12266
12267 start_sequence ();
12268 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12269 insns = get_insns ();
12270 end_sequence ();
12271
12272 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12273 share the LD_BASE result with other LD model accesses. */
12274 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12275 UNSPEC_TLS_LD_BASE);
12276
12277 RTL_CONST_CALL_P (insns) = 1;
12278 emit_libcall_block (insns, base, rax, eqv);
12279 }
12280 else
12281 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12282 }
12283
12284 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12285 off = gen_rtx_CONST (Pmode, off);
12286
12287 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12288
12289 if (TARGET_GNU2_TLS)
12290 {
12291 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12292
12293 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12294 }
12295 break;
12296
12297 case TLS_MODEL_INITIAL_EXEC:
12298 if (TARGET_64BIT)
12299 {
12300 if (TARGET_SUN_TLS)
12301 {
12302 /* The Sun linker took the AMD64 TLS spec literally
12303 and can only handle %rax as destination of the
12304 initial executable code sequence. */
12305
12306 dest = gen_reg_rtx (Pmode);
12307 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12308 return dest;
12309 }
12310
12311 pic = NULL;
12312 type = UNSPEC_GOTNTPOFF;
12313 }
12314 else if (flag_pic)
12315 {
12316 if (reload_in_progress)
12317 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12318 pic = pic_offset_table_rtx;
12319 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12320 }
12321 else if (!TARGET_ANY_GNU_TLS)
12322 {
12323 pic = gen_reg_rtx (Pmode);
12324 emit_insn (gen_set_got (pic));
12325 type = UNSPEC_GOTTPOFF;
12326 }
12327 else
12328 {
12329 pic = NULL;
12330 type = UNSPEC_INDNTPOFF;
12331 }
12332
12333 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12334 off = gen_rtx_CONST (Pmode, off);
12335 if (pic)
12336 off = gen_rtx_PLUS (Pmode, pic, off);
12337 off = gen_const_mem (Pmode, off);
12338 set_mem_alias_set (off, ix86_GOT_alias_set ());
12339
12340 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12341 {
12342 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12343 off = force_reg (Pmode, off);
12344 return gen_rtx_PLUS (Pmode, base, off);
12345 }
12346 else
12347 {
12348 base = get_thread_pointer (true);
12349 dest = gen_reg_rtx (Pmode);
12350 emit_insn (gen_subsi3 (dest, base, off));
12351 }
12352 break;
12353
12354 case TLS_MODEL_LOCAL_EXEC:
12355 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12356 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12357 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12358 off = gen_rtx_CONST (Pmode, off);
12359
12360 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12361 {
12362 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12363 return gen_rtx_PLUS (Pmode, base, off);
12364 }
12365 else
12366 {
12367 base = get_thread_pointer (true);
12368 dest = gen_reg_rtx (Pmode);
12369 emit_insn (gen_subsi3 (dest, base, off));
12370 }
12371 break;
12372
12373 default:
12374 gcc_unreachable ();
12375 }
12376
12377 return dest;
12378 }
12379
12380 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12381 to symbol DECL. */
12382
12383 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12384 htab_t dllimport_map;
12385
12386 static tree
12387 get_dllimport_decl (tree decl)
12388 {
12389 struct tree_map *h, in;
12390 void **loc;
12391 const char *name;
12392 const char *prefix;
12393 size_t namelen, prefixlen;
12394 char *imp_name;
12395 tree to;
12396 rtx rtl;
12397
12398 if (!dllimport_map)
12399 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12400
12401 in.hash = htab_hash_pointer (decl);
12402 in.base.from = decl;
12403 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12404 h = (struct tree_map *) *loc;
12405 if (h)
12406 return h->to;
12407
12408 *loc = h = ggc_alloc_tree_map ();
12409 h->hash = in.hash;
12410 h->base.from = decl;
12411 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12412 VAR_DECL, NULL, ptr_type_node);
12413 DECL_ARTIFICIAL (to) = 1;
12414 DECL_IGNORED_P (to) = 1;
12415 DECL_EXTERNAL (to) = 1;
12416 TREE_READONLY (to) = 1;
12417
12418 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12419 name = targetm.strip_name_encoding (name);
12420 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12421 ? "*__imp_" : "*__imp__";
12422 namelen = strlen (name);
12423 prefixlen = strlen (prefix);
12424 imp_name = (char *) alloca (namelen + prefixlen + 1);
12425 memcpy (imp_name, prefix, prefixlen);
12426 memcpy (imp_name + prefixlen, name, namelen + 1);
12427
12428 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12429 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12430 SET_SYMBOL_REF_DECL (rtl, to);
12431 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12432
12433 rtl = gen_const_mem (Pmode, rtl);
12434 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12435
12436 SET_DECL_RTL (to, rtl);
12437 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12438
12439 return to;
12440 }
12441
12442 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12443 true if we require the result be a register. */
12444
12445 static rtx
12446 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12447 {
12448 tree imp_decl;
12449 rtx x;
12450
12451 gcc_assert (SYMBOL_REF_DECL (symbol));
12452 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12453
12454 x = DECL_RTL (imp_decl);
12455 if (want_reg)
12456 x = force_reg (Pmode, x);
12457 return x;
12458 }
12459
12460 /* Try machine-dependent ways of modifying an illegitimate address
12461 to be legitimate. If we find one, return the new, valid address.
12462 This macro is used in only one place: `memory_address' in explow.c.
12463
12464 OLDX is the address as it was before break_out_memory_refs was called.
12465 In some cases it is useful to look at this to decide what needs to be done.
12466
12467 It is always safe for this macro to do nothing. It exists to recognize
12468 opportunities to optimize the output.
12469
12470 For the 80386, we handle X+REG by loading X into a register R and
12471 using R+REG. R will go in a general reg and indexing will be used.
12472 However, if REG is a broken-out memory address or multiplication,
12473 nothing needs to be done because REG can certainly go in a general reg.
12474
12475 When -fpic is used, special handling is needed for symbolic references.
12476 See comments by legitimize_pic_address in i386.c for details. */
12477
12478 static rtx
12479 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12480 enum machine_mode mode)
12481 {
12482 int changed = 0;
12483 unsigned log;
12484
12485 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12486 if (log)
12487 return legitimize_tls_address (x, (enum tls_model) log, false);
12488 if (GET_CODE (x) == CONST
12489 && GET_CODE (XEXP (x, 0)) == PLUS
12490 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12491 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12492 {
12493 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12494 (enum tls_model) log, false);
12495 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12496 }
12497
12498 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12499 {
12500 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12501 return legitimize_dllimport_symbol (x, true);
12502 if (GET_CODE (x) == CONST
12503 && GET_CODE (XEXP (x, 0)) == PLUS
12504 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12505 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12506 {
12507 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12508 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12509 }
12510 }
12511
12512 if (flag_pic && SYMBOLIC_CONST (x))
12513 return legitimize_pic_address (x, 0);
12514
12515 #if TARGET_MACHO
12516 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12517 return machopic_indirect_data_reference (x, 0);
12518 #endif
12519
12520 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12521 if (GET_CODE (x) == ASHIFT
12522 && CONST_INT_P (XEXP (x, 1))
12523 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12524 {
12525 changed = 1;
12526 log = INTVAL (XEXP (x, 1));
12527 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12528 GEN_INT (1 << log));
12529 }
12530
12531 if (GET_CODE (x) == PLUS)
12532 {
12533 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12534
12535 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12536 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12537 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12538 {
12539 changed = 1;
12540 log = INTVAL (XEXP (XEXP (x, 0), 1));
12541 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12542 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12543 GEN_INT (1 << log));
12544 }
12545
12546 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12547 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12548 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12549 {
12550 changed = 1;
12551 log = INTVAL (XEXP (XEXP (x, 1), 1));
12552 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12553 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12554 GEN_INT (1 << log));
12555 }
12556
12557 /* Put multiply first if it isn't already. */
12558 if (GET_CODE (XEXP (x, 1)) == MULT)
12559 {
12560 rtx tmp = XEXP (x, 0);
12561 XEXP (x, 0) = XEXP (x, 1);
12562 XEXP (x, 1) = tmp;
12563 changed = 1;
12564 }
12565
12566 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12567 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12568 created by virtual register instantiation, register elimination, and
12569 similar optimizations. */
12570 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12571 {
12572 changed = 1;
12573 x = gen_rtx_PLUS (Pmode,
12574 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12575 XEXP (XEXP (x, 1), 0)),
12576 XEXP (XEXP (x, 1), 1));
12577 }
12578
12579 /* Canonicalize
12580 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12581 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12582 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12583 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12584 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12585 && CONSTANT_P (XEXP (x, 1)))
12586 {
12587 rtx constant;
12588 rtx other = NULL_RTX;
12589
12590 if (CONST_INT_P (XEXP (x, 1)))
12591 {
12592 constant = XEXP (x, 1);
12593 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12594 }
12595 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12596 {
12597 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12598 other = XEXP (x, 1);
12599 }
12600 else
12601 constant = 0;
12602
12603 if (constant)
12604 {
12605 changed = 1;
12606 x = gen_rtx_PLUS (Pmode,
12607 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12608 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12609 plus_constant (other, INTVAL (constant)));
12610 }
12611 }
12612
12613 if (changed && ix86_legitimate_address_p (mode, x, false))
12614 return x;
12615
12616 if (GET_CODE (XEXP (x, 0)) == MULT)
12617 {
12618 changed = 1;
12619 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12620 }
12621
12622 if (GET_CODE (XEXP (x, 1)) == MULT)
12623 {
12624 changed = 1;
12625 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12626 }
12627
12628 if (changed
12629 && REG_P (XEXP (x, 1))
12630 && REG_P (XEXP (x, 0)))
12631 return x;
12632
12633 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12634 {
12635 changed = 1;
12636 x = legitimize_pic_address (x, 0);
12637 }
12638
12639 if (changed && ix86_legitimate_address_p (mode, x, false))
12640 return x;
12641
12642 if (REG_P (XEXP (x, 0)))
12643 {
12644 rtx temp = gen_reg_rtx (Pmode);
12645 rtx val = force_operand (XEXP (x, 1), temp);
12646 if (val != temp)
12647 {
12648 if (GET_MODE (val) != Pmode)
12649 val = convert_to_mode (Pmode, val, 1);
12650 emit_move_insn (temp, val);
12651 }
12652
12653 XEXP (x, 1) = temp;
12654 return x;
12655 }
12656
12657 else if (REG_P (XEXP (x, 1)))
12658 {
12659 rtx temp = gen_reg_rtx (Pmode);
12660 rtx val = force_operand (XEXP (x, 0), temp);
12661 if (val != temp)
12662 {
12663 if (GET_MODE (val) != Pmode)
12664 val = convert_to_mode (Pmode, val, 1);
12665 emit_move_insn (temp, val);
12666 }
12667
12668 XEXP (x, 0) = temp;
12669 return x;
12670 }
12671 }
12672
12673 return x;
12674 }
12675 \f
12676 /* Print an integer constant expression in assembler syntax. Addition
12677 and subtraction are the only arithmetic that may appear in these
12678 expressions. FILE is the stdio stream to write to, X is the rtx, and
12679 CODE is the operand print code from the output string. */
12680
12681 static void
12682 output_pic_addr_const (FILE *file, rtx x, int code)
12683 {
12684 char buf[256];
12685
12686 switch (GET_CODE (x))
12687 {
12688 case PC:
12689 gcc_assert (flag_pic);
12690 putc ('.', file);
12691 break;
12692
12693 case SYMBOL_REF:
12694 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12695 output_addr_const (file, x);
12696 else
12697 {
12698 const char *name = XSTR (x, 0);
12699
12700 /* Mark the decl as referenced so that cgraph will
12701 output the function. */
12702 if (SYMBOL_REF_DECL (x))
12703 mark_decl_referenced (SYMBOL_REF_DECL (x));
12704
12705 #if TARGET_MACHO
12706 if (MACHOPIC_INDIRECT
12707 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12708 name = machopic_indirection_name (x, /*stub_p=*/true);
12709 #endif
12710 assemble_name (file, name);
12711 }
12712 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12713 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12714 fputs ("@PLT", file);
12715 break;
12716
12717 case LABEL_REF:
12718 x = XEXP (x, 0);
12719 /* FALLTHRU */
12720 case CODE_LABEL:
12721 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12722 assemble_name (asm_out_file, buf);
12723 break;
12724
12725 case CONST_INT:
12726 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12727 break;
12728
12729 case CONST:
12730 /* This used to output parentheses around the expression,
12731 but that does not work on the 386 (either ATT or BSD assembler). */
12732 output_pic_addr_const (file, XEXP (x, 0), code);
12733 break;
12734
12735 case CONST_DOUBLE:
12736 if (GET_MODE (x) == VOIDmode)
12737 {
12738 /* We can use %d if the number is <32 bits and positive. */
12739 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12740 fprintf (file, "0x%lx%08lx",
12741 (unsigned long) CONST_DOUBLE_HIGH (x),
12742 (unsigned long) CONST_DOUBLE_LOW (x));
12743 else
12744 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12745 }
12746 else
12747 /* We can't handle floating point constants;
12748 TARGET_PRINT_OPERAND must handle them. */
12749 output_operand_lossage ("floating constant misused");
12750 break;
12751
12752 case PLUS:
12753 /* Some assemblers need integer constants to appear first. */
12754 if (CONST_INT_P (XEXP (x, 0)))
12755 {
12756 output_pic_addr_const (file, XEXP (x, 0), code);
12757 putc ('+', file);
12758 output_pic_addr_const (file, XEXP (x, 1), code);
12759 }
12760 else
12761 {
12762 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12763 output_pic_addr_const (file, XEXP (x, 1), code);
12764 putc ('+', file);
12765 output_pic_addr_const (file, XEXP (x, 0), code);
12766 }
12767 break;
12768
12769 case MINUS:
12770 if (!TARGET_MACHO)
12771 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12772 output_pic_addr_const (file, XEXP (x, 0), code);
12773 putc ('-', file);
12774 output_pic_addr_const (file, XEXP (x, 1), code);
12775 if (!TARGET_MACHO)
12776 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12777 break;
12778
12779 case UNSPEC:
12780 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12781 {
12782 bool f = i386_asm_output_addr_const_extra (file, x);
12783 gcc_assert (f);
12784 break;
12785 }
12786
12787 gcc_assert (XVECLEN (x, 0) == 1);
12788 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12789 switch (XINT (x, 1))
12790 {
12791 case UNSPEC_GOT:
12792 fputs ("@GOT", file);
12793 break;
12794 case UNSPEC_GOTOFF:
12795 fputs ("@GOTOFF", file);
12796 break;
12797 case UNSPEC_PLTOFF:
12798 fputs ("@PLTOFF", file);
12799 break;
12800 case UNSPEC_PCREL:
12801 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12802 "(%rip)" : "[rip]", file);
12803 break;
12804 case UNSPEC_GOTPCREL:
12805 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12806 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12807 break;
12808 case UNSPEC_GOTTPOFF:
12809 /* FIXME: This might be @TPOFF in Sun ld too. */
12810 fputs ("@gottpoff", file);
12811 break;
12812 case UNSPEC_TPOFF:
12813 fputs ("@tpoff", file);
12814 break;
12815 case UNSPEC_NTPOFF:
12816 if (TARGET_64BIT)
12817 fputs ("@tpoff", file);
12818 else
12819 fputs ("@ntpoff", file);
12820 break;
12821 case UNSPEC_DTPOFF:
12822 fputs ("@dtpoff", file);
12823 break;
12824 case UNSPEC_GOTNTPOFF:
12825 if (TARGET_64BIT)
12826 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12827 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12828 else
12829 fputs ("@gotntpoff", file);
12830 break;
12831 case UNSPEC_INDNTPOFF:
12832 fputs ("@indntpoff", file);
12833 break;
12834 #if TARGET_MACHO
12835 case UNSPEC_MACHOPIC_OFFSET:
12836 putc ('-', file);
12837 machopic_output_function_base_name (file);
12838 break;
12839 #endif
12840 default:
12841 output_operand_lossage ("invalid UNSPEC as operand");
12842 break;
12843 }
12844 break;
12845
12846 default:
12847 output_operand_lossage ("invalid expression as operand");
12848 }
12849 }
12850
12851 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12852 We need to emit DTP-relative relocations. */
12853
12854 static void ATTRIBUTE_UNUSED
12855 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12856 {
12857 fputs (ASM_LONG, file);
12858 output_addr_const (file, x);
12859 fputs ("@dtpoff", file);
12860 switch (size)
12861 {
12862 case 4:
12863 break;
12864 case 8:
12865 fputs (", 0", file);
12866 break;
12867 default:
12868 gcc_unreachable ();
12869 }
12870 }
12871
12872 /* Return true if X is a representation of the PIC register. This copes
12873 with calls from ix86_find_base_term, where the register might have
12874 been replaced by a cselib value. */
12875
12876 static bool
12877 ix86_pic_register_p (rtx x)
12878 {
12879 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12880 return (pic_offset_table_rtx
12881 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12882 else
12883 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12884 }
12885
12886 /* Helper function for ix86_delegitimize_address.
12887 Attempt to delegitimize TLS local-exec accesses. */
12888
12889 static rtx
12890 ix86_delegitimize_tls_address (rtx orig_x)
12891 {
12892 rtx x = orig_x, unspec;
12893 struct ix86_address addr;
12894
12895 if (!TARGET_TLS_DIRECT_SEG_REFS)
12896 return orig_x;
12897 if (MEM_P (x))
12898 x = XEXP (x, 0);
12899 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12900 return orig_x;
12901 if (ix86_decompose_address (x, &addr) == 0
12902 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12903 || addr.disp == NULL_RTX
12904 || GET_CODE (addr.disp) != CONST)
12905 return orig_x;
12906 unspec = XEXP (addr.disp, 0);
12907 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12908 unspec = XEXP (unspec, 0);
12909 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12910 return orig_x;
12911 x = XVECEXP (unspec, 0, 0);
12912 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12913 if (unspec != XEXP (addr.disp, 0))
12914 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12915 if (addr.index)
12916 {
12917 rtx idx = addr.index;
12918 if (addr.scale != 1)
12919 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12920 x = gen_rtx_PLUS (Pmode, idx, x);
12921 }
12922 if (addr.base)
12923 x = gen_rtx_PLUS (Pmode, addr.base, x);
12924 if (MEM_P (orig_x))
12925 x = replace_equiv_address_nv (orig_x, x);
12926 return x;
12927 }
12928
12929 /* In the name of slightly smaller debug output, and to cater to
12930 general assembler lossage, recognize PIC+GOTOFF and turn it back
12931 into a direct symbol reference.
12932
12933 On Darwin, this is necessary to avoid a crash, because Darwin
12934 has a different PIC label for each routine but the DWARF debugging
12935 information is not associated with any particular routine, so it's
12936 necessary to remove references to the PIC label from RTL stored by
12937 the DWARF output code. */
12938
12939 static rtx
12940 ix86_delegitimize_address (rtx x)
12941 {
12942 rtx orig_x = delegitimize_mem_from_attrs (x);
12943 /* addend is NULL or some rtx if x is something+GOTOFF where
12944 something doesn't include the PIC register. */
12945 rtx addend = NULL_RTX;
12946 /* reg_addend is NULL or a multiple of some register. */
12947 rtx reg_addend = NULL_RTX;
12948 /* const_addend is NULL or a const_int. */
12949 rtx const_addend = NULL_RTX;
12950 /* This is the result, or NULL. */
12951 rtx result = NULL_RTX;
12952
12953 x = orig_x;
12954
12955 if (MEM_P (x))
12956 x = XEXP (x, 0);
12957
12958 if (TARGET_64BIT)
12959 {
12960 if (GET_CODE (x) != CONST
12961 || GET_CODE (XEXP (x, 0)) != UNSPEC
12962 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12963 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12964 || !MEM_P (orig_x))
12965 return ix86_delegitimize_tls_address (orig_x);
12966 x = XVECEXP (XEXP (x, 0), 0, 0);
12967 if (GET_MODE (orig_x) != GET_MODE (x))
12968 {
12969 x = simplify_gen_subreg (GET_MODE (orig_x), x,
12970 GET_MODE (x), 0);
12971 if (x == NULL_RTX)
12972 return orig_x;
12973 }
12974 return x;
12975 }
12976
12977 if (GET_CODE (x) != PLUS
12978 || GET_CODE (XEXP (x, 1)) != CONST)
12979 return ix86_delegitimize_tls_address (orig_x);
12980
12981 if (ix86_pic_register_p (XEXP (x, 0)))
12982 /* %ebx + GOT/GOTOFF */
12983 ;
12984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12985 {
12986 /* %ebx + %reg * scale + GOT/GOTOFF */
12987 reg_addend = XEXP (x, 0);
12988 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12989 reg_addend = XEXP (reg_addend, 1);
12990 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12991 reg_addend = XEXP (reg_addend, 0);
12992 else
12993 {
12994 reg_addend = NULL_RTX;
12995 addend = XEXP (x, 0);
12996 }
12997 }
12998 else
12999 addend = XEXP (x, 0);
13000
13001 x = XEXP (XEXP (x, 1), 0);
13002 if (GET_CODE (x) == PLUS
13003 && CONST_INT_P (XEXP (x, 1)))
13004 {
13005 const_addend = XEXP (x, 1);
13006 x = XEXP (x, 0);
13007 }
13008
13009 if (GET_CODE (x) == UNSPEC
13010 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13011 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13012 result = XVECEXP (x, 0, 0);
13013
13014 if (TARGET_MACHO && darwin_local_data_pic (x)
13015 && !MEM_P (orig_x))
13016 result = XVECEXP (x, 0, 0);
13017
13018 if (! result)
13019 return ix86_delegitimize_tls_address (orig_x);
13020
13021 if (const_addend)
13022 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13023 if (reg_addend)
13024 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13025 if (addend)
13026 {
13027 /* If the rest of original X doesn't involve the PIC register, add
13028 addend and subtract pic_offset_table_rtx. This can happen e.g.
13029 for code like:
13030 leal (%ebx, %ecx, 4), %ecx
13031 ...
13032 movl foo@GOTOFF(%ecx), %edx
13033 in which case we return (%ecx - %ebx) + foo. */
13034 if (pic_offset_table_rtx)
13035 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13036 pic_offset_table_rtx),
13037 result);
13038 else
13039 return orig_x;
13040 }
13041 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13042 {
13043 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13044 if (result == NULL_RTX)
13045 return orig_x;
13046 }
13047 return result;
13048 }
13049
13050 /* If X is a machine specific address (i.e. a symbol or label being
13051 referenced as a displacement from the GOT implemented using an
13052 UNSPEC), then return the base term. Otherwise return X. */
13053
13054 rtx
13055 ix86_find_base_term (rtx x)
13056 {
13057 rtx term;
13058
13059 if (TARGET_64BIT)
13060 {
13061 if (GET_CODE (x) != CONST)
13062 return x;
13063 term = XEXP (x, 0);
13064 if (GET_CODE (term) == PLUS
13065 && (CONST_INT_P (XEXP (term, 1))
13066 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13067 term = XEXP (term, 0);
13068 if (GET_CODE (term) != UNSPEC
13069 || (XINT (term, 1) != UNSPEC_GOTPCREL
13070 && XINT (term, 1) != UNSPEC_PCREL))
13071 return x;
13072
13073 return XVECEXP (term, 0, 0);
13074 }
13075
13076 return ix86_delegitimize_address (x);
13077 }
13078 \f
13079 static void
13080 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13081 int fp, FILE *file)
13082 {
13083 const char *suffix;
13084
13085 if (mode == CCFPmode || mode == CCFPUmode)
13086 {
13087 code = ix86_fp_compare_code_to_integer (code);
13088 mode = CCmode;
13089 }
13090 if (reverse)
13091 code = reverse_condition (code);
13092
13093 switch (code)
13094 {
13095 case EQ:
13096 switch (mode)
13097 {
13098 case CCAmode:
13099 suffix = "a";
13100 break;
13101
13102 case CCCmode:
13103 suffix = "c";
13104 break;
13105
13106 case CCOmode:
13107 suffix = "o";
13108 break;
13109
13110 case CCSmode:
13111 suffix = "s";
13112 break;
13113
13114 default:
13115 suffix = "e";
13116 }
13117 break;
13118 case NE:
13119 switch (mode)
13120 {
13121 case CCAmode:
13122 suffix = "na";
13123 break;
13124
13125 case CCCmode:
13126 suffix = "nc";
13127 break;
13128
13129 case CCOmode:
13130 suffix = "no";
13131 break;
13132
13133 case CCSmode:
13134 suffix = "ns";
13135 break;
13136
13137 default:
13138 suffix = "ne";
13139 }
13140 break;
13141 case GT:
13142 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13143 suffix = "g";
13144 break;
13145 case GTU:
13146 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13147 Those same assemblers have the same but opposite lossage on cmov. */
13148 if (mode == CCmode)
13149 suffix = fp ? "nbe" : "a";
13150 else if (mode == CCCmode)
13151 suffix = "b";
13152 else
13153 gcc_unreachable ();
13154 break;
13155 case LT:
13156 switch (mode)
13157 {
13158 case CCNOmode:
13159 case CCGOCmode:
13160 suffix = "s";
13161 break;
13162
13163 case CCmode:
13164 case CCGCmode:
13165 suffix = "l";
13166 break;
13167
13168 default:
13169 gcc_unreachable ();
13170 }
13171 break;
13172 case LTU:
13173 gcc_assert (mode == CCmode || mode == CCCmode);
13174 suffix = "b";
13175 break;
13176 case GE:
13177 switch (mode)
13178 {
13179 case CCNOmode:
13180 case CCGOCmode:
13181 suffix = "ns";
13182 break;
13183
13184 case CCmode:
13185 case CCGCmode:
13186 suffix = "ge";
13187 break;
13188
13189 default:
13190 gcc_unreachable ();
13191 }
13192 break;
13193 case GEU:
13194 /* ??? As above. */
13195 gcc_assert (mode == CCmode || mode == CCCmode);
13196 suffix = fp ? "nb" : "ae";
13197 break;
13198 case LE:
13199 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13200 suffix = "le";
13201 break;
13202 case LEU:
13203 /* ??? As above. */
13204 if (mode == CCmode)
13205 suffix = "be";
13206 else if (mode == CCCmode)
13207 suffix = fp ? "nb" : "ae";
13208 else
13209 gcc_unreachable ();
13210 break;
13211 case UNORDERED:
13212 suffix = fp ? "u" : "p";
13213 break;
13214 case ORDERED:
13215 suffix = fp ? "nu" : "np";
13216 break;
13217 default:
13218 gcc_unreachable ();
13219 }
13220 fputs (suffix, file);
13221 }
13222
13223 /* Print the name of register X to FILE based on its machine mode and number.
13224 If CODE is 'w', pretend the mode is HImode.
13225 If CODE is 'b', pretend the mode is QImode.
13226 If CODE is 'k', pretend the mode is SImode.
13227 If CODE is 'q', pretend the mode is DImode.
13228 If CODE is 'x', pretend the mode is V4SFmode.
13229 If CODE is 't', pretend the mode is V8SFmode.
13230 If CODE is 'h', pretend the reg is the 'high' byte register.
13231 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13232 If CODE is 'd', duplicate the operand for AVX instruction.
13233 */
13234
13235 void
13236 print_reg (rtx x, int code, FILE *file)
13237 {
13238 const char *reg;
13239 bool duplicated = code == 'd' && TARGET_AVX;
13240
13241 gcc_assert (x == pc_rtx
13242 || (REGNO (x) != ARG_POINTER_REGNUM
13243 && REGNO (x) != FRAME_POINTER_REGNUM
13244 && REGNO (x) != FLAGS_REG
13245 && REGNO (x) != FPSR_REG
13246 && REGNO (x) != FPCR_REG));
13247
13248 if (ASSEMBLER_DIALECT == ASM_ATT)
13249 putc ('%', file);
13250
13251 if (x == pc_rtx)
13252 {
13253 gcc_assert (TARGET_64BIT);
13254 fputs ("rip", file);
13255 return;
13256 }
13257
13258 if (code == 'w' || MMX_REG_P (x))
13259 code = 2;
13260 else if (code == 'b')
13261 code = 1;
13262 else if (code == 'k')
13263 code = 4;
13264 else if (code == 'q')
13265 code = 8;
13266 else if (code == 'y')
13267 code = 3;
13268 else if (code == 'h')
13269 code = 0;
13270 else if (code == 'x')
13271 code = 16;
13272 else if (code == 't')
13273 code = 32;
13274 else
13275 code = GET_MODE_SIZE (GET_MODE (x));
13276
13277 /* Irritatingly, AMD extended registers use different naming convention
13278 from the normal registers. */
13279 if (REX_INT_REG_P (x))
13280 {
13281 gcc_assert (TARGET_64BIT);
13282 switch (code)
13283 {
13284 case 0:
13285 error ("extended registers have no high halves");
13286 break;
13287 case 1:
13288 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13289 break;
13290 case 2:
13291 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13292 break;
13293 case 4:
13294 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13295 break;
13296 case 8:
13297 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13298 break;
13299 default:
13300 error ("unsupported operand size for extended register");
13301 break;
13302 }
13303 return;
13304 }
13305
13306 reg = NULL;
13307 switch (code)
13308 {
13309 case 3:
13310 if (STACK_TOP_P (x))
13311 {
13312 reg = "st(0)";
13313 break;
13314 }
13315 /* FALLTHRU */
13316 case 8:
13317 case 4:
13318 case 12:
13319 if (! ANY_FP_REG_P (x))
13320 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13321 /* FALLTHRU */
13322 case 16:
13323 case 2:
13324 normal:
13325 reg = hi_reg_name[REGNO (x)];
13326 break;
13327 case 1:
13328 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13329 goto normal;
13330 reg = qi_reg_name[REGNO (x)];
13331 break;
13332 case 0:
13333 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13334 goto normal;
13335 reg = qi_high_reg_name[REGNO (x)];
13336 break;
13337 case 32:
13338 if (SSE_REG_P (x))
13339 {
13340 gcc_assert (!duplicated);
13341 putc ('y', file);
13342 fputs (hi_reg_name[REGNO (x)] + 1, file);
13343 return;
13344 }
13345 break;
13346 default:
13347 gcc_unreachable ();
13348 }
13349
13350 fputs (reg, file);
13351 if (duplicated)
13352 {
13353 if (ASSEMBLER_DIALECT == ASM_ATT)
13354 fprintf (file, ", %%%s", reg);
13355 else
13356 fprintf (file, ", %s", reg);
13357 }
13358 }
13359
13360 /* Locate some local-dynamic symbol still in use by this function
13361 so that we can print its name in some tls_local_dynamic_base
13362 pattern. */
13363
13364 static int
13365 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13366 {
13367 rtx x = *px;
13368
13369 if (GET_CODE (x) == SYMBOL_REF
13370 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13371 {
13372 cfun->machine->some_ld_name = XSTR (x, 0);
13373 return 1;
13374 }
13375
13376 return 0;
13377 }
13378
13379 static const char *
13380 get_some_local_dynamic_name (void)
13381 {
13382 rtx insn;
13383
13384 if (cfun->machine->some_ld_name)
13385 return cfun->machine->some_ld_name;
13386
13387 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13388 if (NONDEBUG_INSN_P (insn)
13389 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13390 return cfun->machine->some_ld_name;
13391
13392 return NULL;
13393 }
13394
13395 /* Meaning of CODE:
13396 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13397 C -- print opcode suffix for set/cmov insn.
13398 c -- like C, but print reversed condition
13399 F,f -- likewise, but for floating-point.
13400 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13401 otherwise nothing
13402 R -- print the prefix for register names.
13403 z -- print the opcode suffix for the size of the current operand.
13404 Z -- likewise, with special suffixes for x87 instructions.
13405 * -- print a star (in certain assembler syntax)
13406 A -- print an absolute memory reference.
13407 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13408 s -- print a shift double count, followed by the assemblers argument
13409 delimiter.
13410 b -- print the QImode name of the register for the indicated operand.
13411 %b0 would print %al if operands[0] is reg 0.
13412 w -- likewise, print the HImode name of the register.
13413 k -- likewise, print the SImode name of the register.
13414 q -- likewise, print the DImode name of the register.
13415 x -- likewise, print the V4SFmode name of the register.
13416 t -- likewise, print the V8SFmode name of the register.
13417 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13418 y -- print "st(0)" instead of "st" as a register.
13419 d -- print duplicated register operand for AVX instruction.
13420 D -- print condition for SSE cmp instruction.
13421 P -- if PIC, print an @PLT suffix.
13422 p -- print raw symbol name.
13423 X -- don't print any sort of PIC '@' suffix for a symbol.
13424 & -- print some in-use local-dynamic symbol name.
13425 H -- print a memory address offset by 8; used for sse high-parts
13426 Y -- print condition for XOP pcom* instruction.
13427 + -- print a branch hint as 'cs' or 'ds' prefix
13428 ; -- print a semicolon (after prefixes due to bug in older gas).
13429 @ -- print a segment register of thread base pointer load
13430 */
13431
13432 void
13433 ix86_print_operand (FILE *file, rtx x, int code)
13434 {
13435 if (code)
13436 {
13437 switch (code)
13438 {
13439 case '*':
13440 if (ASSEMBLER_DIALECT == ASM_ATT)
13441 putc ('*', file);
13442 return;
13443
13444 case '&':
13445 {
13446 const char *name = get_some_local_dynamic_name ();
13447 if (name == NULL)
13448 output_operand_lossage ("'%%&' used without any "
13449 "local dynamic TLS references");
13450 else
13451 assemble_name (file, name);
13452 return;
13453 }
13454
13455 case 'A':
13456 switch (ASSEMBLER_DIALECT)
13457 {
13458 case ASM_ATT:
13459 putc ('*', file);
13460 break;
13461
13462 case ASM_INTEL:
13463 /* Intel syntax. For absolute addresses, registers should not
13464 be surrounded by braces. */
13465 if (!REG_P (x))
13466 {
13467 putc ('[', file);
13468 ix86_print_operand (file, x, 0);
13469 putc (']', file);
13470 return;
13471 }
13472 break;
13473
13474 default:
13475 gcc_unreachable ();
13476 }
13477
13478 ix86_print_operand (file, x, 0);
13479 return;
13480
13481
13482 case 'L':
13483 if (ASSEMBLER_DIALECT == ASM_ATT)
13484 putc ('l', file);
13485 return;
13486
13487 case 'W':
13488 if (ASSEMBLER_DIALECT == ASM_ATT)
13489 putc ('w', file);
13490 return;
13491
13492 case 'B':
13493 if (ASSEMBLER_DIALECT == ASM_ATT)
13494 putc ('b', file);
13495 return;
13496
13497 case 'Q':
13498 if (ASSEMBLER_DIALECT == ASM_ATT)
13499 putc ('l', file);
13500 return;
13501
13502 case 'S':
13503 if (ASSEMBLER_DIALECT == ASM_ATT)
13504 putc ('s', file);
13505 return;
13506
13507 case 'T':
13508 if (ASSEMBLER_DIALECT == ASM_ATT)
13509 putc ('t', file);
13510 return;
13511
13512 case 'z':
13513 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13514 {
13515 /* Opcodes don't get size suffixes if using Intel opcodes. */
13516 if (ASSEMBLER_DIALECT == ASM_INTEL)
13517 return;
13518
13519 switch (GET_MODE_SIZE (GET_MODE (x)))
13520 {
13521 case 1:
13522 putc ('b', file);
13523 return;
13524
13525 case 2:
13526 putc ('w', file);
13527 return;
13528
13529 case 4:
13530 putc ('l', file);
13531 return;
13532
13533 case 8:
13534 putc ('q', file);
13535 return;
13536
13537 default:
13538 output_operand_lossage
13539 ("invalid operand size for operand code '%c'", code);
13540 return;
13541 }
13542 }
13543
13544 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13545 warning
13546 (0, "non-integer operand used with operand code '%c'", code);
13547 /* FALLTHRU */
13548
13549 case 'Z':
13550 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13551 if (ASSEMBLER_DIALECT == ASM_INTEL)
13552 return;
13553
13554 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13555 {
13556 switch (GET_MODE_SIZE (GET_MODE (x)))
13557 {
13558 case 2:
13559 #ifdef HAVE_AS_IX86_FILDS
13560 putc ('s', file);
13561 #endif
13562 return;
13563
13564 case 4:
13565 putc ('l', file);
13566 return;
13567
13568 case 8:
13569 #ifdef HAVE_AS_IX86_FILDQ
13570 putc ('q', file);
13571 #else
13572 fputs ("ll", file);
13573 #endif
13574 return;
13575
13576 default:
13577 break;
13578 }
13579 }
13580 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13581 {
13582 /* 387 opcodes don't get size suffixes
13583 if the operands are registers. */
13584 if (STACK_REG_P (x))
13585 return;
13586
13587 switch (GET_MODE_SIZE (GET_MODE (x)))
13588 {
13589 case 4:
13590 putc ('s', file);
13591 return;
13592
13593 case 8:
13594 putc ('l', file);
13595 return;
13596
13597 case 12:
13598 case 16:
13599 putc ('t', file);
13600 return;
13601
13602 default:
13603 break;
13604 }
13605 }
13606 else
13607 {
13608 output_operand_lossage
13609 ("invalid operand type used with operand code '%c'", code);
13610 return;
13611 }
13612
13613 output_operand_lossage
13614 ("invalid operand size for operand code '%c'", code);
13615 return;
13616
13617 case 'd':
13618 case 'b':
13619 case 'w':
13620 case 'k':
13621 case 'q':
13622 case 'h':
13623 case 't':
13624 case 'y':
13625 case 'x':
13626 case 'X':
13627 case 'P':
13628 case 'p':
13629 break;
13630
13631 case 's':
13632 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13633 {
13634 ix86_print_operand (file, x, 0);
13635 fputs (", ", file);
13636 }
13637 return;
13638
13639 case 'D':
13640 /* Little bit of braindamage here. The SSE compare instructions
13641 does use completely different names for the comparisons that the
13642 fp conditional moves. */
13643 if (TARGET_AVX)
13644 {
13645 switch (GET_CODE (x))
13646 {
13647 case EQ:
13648 fputs ("eq", file);
13649 break;
13650 case UNEQ:
13651 fputs ("eq_us", file);
13652 break;
13653 case LT:
13654 fputs ("lt", file);
13655 break;
13656 case UNLT:
13657 fputs ("nge", file);
13658 break;
13659 case LE:
13660 fputs ("le", file);
13661 break;
13662 case UNLE:
13663 fputs ("ngt", file);
13664 break;
13665 case UNORDERED:
13666 fputs ("unord", file);
13667 break;
13668 case NE:
13669 fputs ("neq", file);
13670 break;
13671 case LTGT:
13672 fputs ("neq_oq", file);
13673 break;
13674 case GE:
13675 fputs ("ge", file);
13676 break;
13677 case UNGE:
13678 fputs ("nlt", file);
13679 break;
13680 case GT:
13681 fputs ("gt", file);
13682 break;
13683 case UNGT:
13684 fputs ("nle", file);
13685 break;
13686 case ORDERED:
13687 fputs ("ord", file);
13688 break;
13689 default:
13690 output_operand_lossage ("operand is not a condition code, "
13691 "invalid operand code 'D'");
13692 return;
13693 }
13694 }
13695 else
13696 {
13697 switch (GET_CODE (x))
13698 {
13699 case EQ:
13700 case UNEQ:
13701 fputs ("eq", file);
13702 break;
13703 case LT:
13704 case UNLT:
13705 fputs ("lt", file);
13706 break;
13707 case LE:
13708 case UNLE:
13709 fputs ("le", file);
13710 break;
13711 case UNORDERED:
13712 fputs ("unord", file);
13713 break;
13714 case NE:
13715 case LTGT:
13716 fputs ("neq", file);
13717 break;
13718 case UNGE:
13719 case GE:
13720 fputs ("nlt", file);
13721 break;
13722 case UNGT:
13723 case GT:
13724 fputs ("nle", file);
13725 break;
13726 case ORDERED:
13727 fputs ("ord", file);
13728 break;
13729 default:
13730 output_operand_lossage ("operand is not a condition code, "
13731 "invalid operand code 'D'");
13732 return;
13733 }
13734 }
13735 return;
13736 case 'O':
13737 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13738 if (ASSEMBLER_DIALECT == ASM_ATT)
13739 {
13740 switch (GET_MODE (x))
13741 {
13742 case HImode: putc ('w', file); break;
13743 case SImode:
13744 case SFmode: putc ('l', file); break;
13745 case DImode:
13746 case DFmode: putc ('q', file); break;
13747 default: gcc_unreachable ();
13748 }
13749 putc ('.', file);
13750 }
13751 #endif
13752 return;
13753 case 'C':
13754 if (!COMPARISON_P (x))
13755 {
13756 output_operand_lossage ("operand is neither a constant nor a "
13757 "condition code, invalid operand code "
13758 "'C'");
13759 return;
13760 }
13761 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13762 return;
13763 case 'F':
13764 if (!COMPARISON_P (x))
13765 {
13766 output_operand_lossage ("operand is neither a constant nor a "
13767 "condition code, invalid operand code "
13768 "'F'");
13769 return;
13770 }
13771 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13772 if (ASSEMBLER_DIALECT == ASM_ATT)
13773 putc ('.', file);
13774 #endif
13775 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13776 return;
13777
13778 /* Like above, but reverse condition */
13779 case 'c':
13780 /* Check to see if argument to %c is really a constant
13781 and not a condition code which needs to be reversed. */
13782 if (!COMPARISON_P (x))
13783 {
13784 output_operand_lossage ("operand is neither a constant nor a "
13785 "condition code, invalid operand "
13786 "code 'c'");
13787 return;
13788 }
13789 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13790 return;
13791 case 'f':
13792 if (!COMPARISON_P (x))
13793 {
13794 output_operand_lossage ("operand is neither a constant nor a "
13795 "condition code, invalid operand "
13796 "code 'f'");
13797 return;
13798 }
13799 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13800 if (ASSEMBLER_DIALECT == ASM_ATT)
13801 putc ('.', file);
13802 #endif
13803 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13804 return;
13805
13806 case 'H':
13807 /* It doesn't actually matter what mode we use here, as we're
13808 only going to use this for printing. */
13809 x = adjust_address_nv (x, DImode, 8);
13810 break;
13811
13812 case '+':
13813 {
13814 rtx x;
13815
13816 if (!optimize
13817 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13818 return;
13819
13820 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13821 if (x)
13822 {
13823 int pred_val = INTVAL (XEXP (x, 0));
13824
13825 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13826 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13827 {
13828 int taken = pred_val > REG_BR_PROB_BASE / 2;
13829 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13830
13831 /* Emit hints only in the case default branch prediction
13832 heuristics would fail. */
13833 if (taken != cputaken)
13834 {
13835 /* We use 3e (DS) prefix for taken branches and
13836 2e (CS) prefix for not taken branches. */
13837 if (taken)
13838 fputs ("ds ; ", file);
13839 else
13840 fputs ("cs ; ", file);
13841 }
13842 }
13843 }
13844 return;
13845 }
13846
13847 case 'Y':
13848 switch (GET_CODE (x))
13849 {
13850 case NE:
13851 fputs ("neq", file);
13852 break;
13853 case EQ:
13854 fputs ("eq", file);
13855 break;
13856 case GE:
13857 case GEU:
13858 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13859 break;
13860 case GT:
13861 case GTU:
13862 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13863 break;
13864 case LE:
13865 case LEU:
13866 fputs ("le", file);
13867 break;
13868 case LT:
13869 case LTU:
13870 fputs ("lt", file);
13871 break;
13872 case UNORDERED:
13873 fputs ("unord", file);
13874 break;
13875 case ORDERED:
13876 fputs ("ord", file);
13877 break;
13878 case UNEQ:
13879 fputs ("ueq", file);
13880 break;
13881 case UNGE:
13882 fputs ("nlt", file);
13883 break;
13884 case UNGT:
13885 fputs ("nle", file);
13886 break;
13887 case UNLE:
13888 fputs ("ule", file);
13889 break;
13890 case UNLT:
13891 fputs ("ult", file);
13892 break;
13893 case LTGT:
13894 fputs ("une", file);
13895 break;
13896 default:
13897 output_operand_lossage ("operand is not a condition code, "
13898 "invalid operand code 'Y'");
13899 return;
13900 }
13901 return;
13902
13903 case ';':
13904 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13905 putc (';', file);
13906 #endif
13907 return;
13908
13909 case '@':
13910 if (ASSEMBLER_DIALECT == ASM_ATT)
13911 putc ('%', file);
13912
13913 /* The kernel uses a different segment register for performance
13914 reasons; a system call would not have to trash the userspace
13915 segment register, which would be expensive. */
13916 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13917 fputs ("fs", file);
13918 else
13919 fputs ("gs", file);
13920 return;
13921
13922 default:
13923 output_operand_lossage ("invalid operand code '%c'", code);
13924 }
13925 }
13926
13927 if (REG_P (x))
13928 print_reg (x, code, file);
13929
13930 else if (MEM_P (x))
13931 {
13932 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13933 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13934 && GET_MODE (x) != BLKmode)
13935 {
13936 const char * size;
13937 switch (GET_MODE_SIZE (GET_MODE (x)))
13938 {
13939 case 1: size = "BYTE"; break;
13940 case 2: size = "WORD"; break;
13941 case 4: size = "DWORD"; break;
13942 case 8: size = "QWORD"; break;
13943 case 12: size = "TBYTE"; break;
13944 case 16:
13945 if (GET_MODE (x) == XFmode)
13946 size = "TBYTE";
13947 else
13948 size = "XMMWORD";
13949 break;
13950 case 32: size = "YMMWORD"; break;
13951 default:
13952 gcc_unreachable ();
13953 }
13954
13955 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13956 if (code == 'b')
13957 size = "BYTE";
13958 else if (code == 'w')
13959 size = "WORD";
13960 else if (code == 'k')
13961 size = "DWORD";
13962
13963 fputs (size, file);
13964 fputs (" PTR ", file);
13965 }
13966
13967 x = XEXP (x, 0);
13968 /* Avoid (%rip) for call operands. */
13969 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13970 && !CONST_INT_P (x))
13971 output_addr_const (file, x);
13972 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13973 output_operand_lossage ("invalid constraints for operand");
13974 else
13975 output_address (x);
13976 }
13977
13978 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13979 {
13980 REAL_VALUE_TYPE r;
13981 long l;
13982
13983 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13984 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13985
13986 if (ASSEMBLER_DIALECT == ASM_ATT)
13987 putc ('$', file);
13988 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13989 if (code == 'q')
13990 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13991 else
13992 fprintf (file, "0x%08x", (unsigned int) l);
13993 }
13994
13995 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13996 {
13997 REAL_VALUE_TYPE r;
13998 long l[2];
13999
14000 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14001 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14002
14003 if (ASSEMBLER_DIALECT == ASM_ATT)
14004 putc ('$', file);
14005 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14006 }
14007
14008 /* These float cases don't actually occur as immediate operands. */
14009 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14010 {
14011 char dstr[30];
14012
14013 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14014 fputs (dstr, file);
14015 }
14016
14017 else
14018 {
14019 /* We have patterns that allow zero sets of memory, for instance.
14020 In 64-bit mode, we should probably support all 8-byte vectors,
14021 since we can in fact encode that into an immediate. */
14022 if (GET_CODE (x) == CONST_VECTOR)
14023 {
14024 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14025 x = const0_rtx;
14026 }
14027
14028 if (code != 'P' && code != 'p')
14029 {
14030 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14031 {
14032 if (ASSEMBLER_DIALECT == ASM_ATT)
14033 putc ('$', file);
14034 }
14035 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14036 || GET_CODE (x) == LABEL_REF)
14037 {
14038 if (ASSEMBLER_DIALECT == ASM_ATT)
14039 putc ('$', file);
14040 else
14041 fputs ("OFFSET FLAT:", file);
14042 }
14043 }
14044 if (CONST_INT_P (x))
14045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14046 else if (flag_pic || MACHOPIC_INDIRECT)
14047 output_pic_addr_const (file, x, code);
14048 else
14049 output_addr_const (file, x);
14050 }
14051 }
14052
14053 static bool
14054 ix86_print_operand_punct_valid_p (unsigned char code)
14055 {
14056 return (code == '@' || code == '*' || code == '+'
14057 || code == '&' || code == ';');
14058 }
14059 \f
14060 /* Print a memory operand whose address is ADDR. */
14061
14062 static void
14063 ix86_print_operand_address (FILE *file, rtx addr)
14064 {
14065 struct ix86_address parts;
14066 rtx base, index, disp;
14067 int scale;
14068 int ok = ix86_decompose_address (addr, &parts);
14069
14070 gcc_assert (ok);
14071
14072 base = parts.base;
14073 index = parts.index;
14074 disp = parts.disp;
14075 scale = parts.scale;
14076
14077 switch (parts.seg)
14078 {
14079 case SEG_DEFAULT:
14080 break;
14081 case SEG_FS:
14082 case SEG_GS:
14083 if (ASSEMBLER_DIALECT == ASM_ATT)
14084 putc ('%', file);
14085 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14086 break;
14087 default:
14088 gcc_unreachable ();
14089 }
14090
14091 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14092 if (TARGET_64BIT && !base && !index)
14093 {
14094 rtx symbol = disp;
14095
14096 if (GET_CODE (disp) == CONST
14097 && GET_CODE (XEXP (disp, 0)) == PLUS
14098 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14099 symbol = XEXP (XEXP (disp, 0), 0);
14100
14101 if (GET_CODE (symbol) == LABEL_REF
14102 || (GET_CODE (symbol) == SYMBOL_REF
14103 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14104 base = pc_rtx;
14105 }
14106 if (!base && !index)
14107 {
14108 /* Displacement only requires special attention. */
14109
14110 if (CONST_INT_P (disp))
14111 {
14112 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14113 fputs ("ds:", file);
14114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14115 }
14116 else if (flag_pic)
14117 output_pic_addr_const (file, disp, 0);
14118 else
14119 output_addr_const (file, disp);
14120 }
14121 else
14122 {
14123 /* Print DImode registers on 64bit targets to avoid addr32 prefixes. */
14124 int code = TARGET_64BIT ? 'q' : 0;
14125
14126 if (ASSEMBLER_DIALECT == ASM_ATT)
14127 {
14128 if (disp)
14129 {
14130 if (flag_pic)
14131 output_pic_addr_const (file, disp, 0);
14132 else if (GET_CODE (disp) == LABEL_REF)
14133 output_asm_label (disp);
14134 else
14135 output_addr_const (file, disp);
14136 }
14137
14138 putc ('(', file);
14139 if (base)
14140 print_reg (base, code, file);
14141 if (index)
14142 {
14143 putc (',', file);
14144 print_reg (index, code, file);
14145 if (scale != 1)
14146 fprintf (file, ",%d", scale);
14147 }
14148 putc (')', file);
14149 }
14150 else
14151 {
14152 rtx offset = NULL_RTX;
14153
14154 if (disp)
14155 {
14156 /* Pull out the offset of a symbol; print any symbol itself. */
14157 if (GET_CODE (disp) == CONST
14158 && GET_CODE (XEXP (disp, 0)) == PLUS
14159 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14160 {
14161 offset = XEXP (XEXP (disp, 0), 1);
14162 disp = gen_rtx_CONST (VOIDmode,
14163 XEXP (XEXP (disp, 0), 0));
14164 }
14165
14166 if (flag_pic)
14167 output_pic_addr_const (file, disp, 0);
14168 else if (GET_CODE (disp) == LABEL_REF)
14169 output_asm_label (disp);
14170 else if (CONST_INT_P (disp))
14171 offset = disp;
14172 else
14173 output_addr_const (file, disp);
14174 }
14175
14176 putc ('[', file);
14177 if (base)
14178 {
14179 print_reg (base, code, file);
14180 if (offset)
14181 {
14182 if (INTVAL (offset) >= 0)
14183 putc ('+', file);
14184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14185 }
14186 }
14187 else if (offset)
14188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14189 else
14190 putc ('0', file);
14191
14192 if (index)
14193 {
14194 putc ('+', file);
14195 print_reg (index, code, file);
14196 if (scale != 1)
14197 fprintf (file, "*%d", scale);
14198 }
14199 putc (']', file);
14200 }
14201 }
14202 }
14203
14204 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14205
14206 static bool
14207 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14208 {
14209 rtx op;
14210
14211 if (GET_CODE (x) != UNSPEC)
14212 return false;
14213
14214 op = XVECEXP (x, 0, 0);
14215 switch (XINT (x, 1))
14216 {
14217 case UNSPEC_GOTTPOFF:
14218 output_addr_const (file, op);
14219 /* FIXME: This might be @TPOFF in Sun ld. */
14220 fputs ("@gottpoff", file);
14221 break;
14222 case UNSPEC_TPOFF:
14223 output_addr_const (file, op);
14224 fputs ("@tpoff", file);
14225 break;
14226 case UNSPEC_NTPOFF:
14227 output_addr_const (file, op);
14228 if (TARGET_64BIT)
14229 fputs ("@tpoff", file);
14230 else
14231 fputs ("@ntpoff", file);
14232 break;
14233 case UNSPEC_DTPOFF:
14234 output_addr_const (file, op);
14235 fputs ("@dtpoff", file);
14236 break;
14237 case UNSPEC_GOTNTPOFF:
14238 output_addr_const (file, op);
14239 if (TARGET_64BIT)
14240 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14241 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14242 else
14243 fputs ("@gotntpoff", file);
14244 break;
14245 case UNSPEC_INDNTPOFF:
14246 output_addr_const (file, op);
14247 fputs ("@indntpoff", file);
14248 break;
14249 #if TARGET_MACHO
14250 case UNSPEC_MACHOPIC_OFFSET:
14251 output_addr_const (file, op);
14252 putc ('-', file);
14253 machopic_output_function_base_name (file);
14254 break;
14255 #endif
14256
14257 case UNSPEC_STACK_CHECK:
14258 {
14259 int offset;
14260
14261 gcc_assert (flag_split_stack);
14262
14263 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14264 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14265 #else
14266 gcc_unreachable ();
14267 #endif
14268
14269 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14270 }
14271 break;
14272
14273 default:
14274 return false;
14275 }
14276
14277 return true;
14278 }
14279 \f
14280 /* Split one or more double-mode RTL references into pairs of half-mode
14281 references. The RTL can be REG, offsettable MEM, integer constant, or
14282 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14283 split and "num" is its length. lo_half and hi_half are output arrays
14284 that parallel "operands". */
14285
14286 void
14287 split_double_mode (enum machine_mode mode, rtx operands[],
14288 int num, rtx lo_half[], rtx hi_half[])
14289 {
14290 enum machine_mode half_mode;
14291 unsigned int byte;
14292
14293 switch (mode)
14294 {
14295 case TImode:
14296 half_mode = DImode;
14297 break;
14298 case DImode:
14299 half_mode = SImode;
14300 break;
14301 default:
14302 gcc_unreachable ();
14303 }
14304
14305 byte = GET_MODE_SIZE (half_mode);
14306
14307 while (num--)
14308 {
14309 rtx op = operands[num];
14310
14311 /* simplify_subreg refuse to split volatile memory addresses,
14312 but we still have to handle it. */
14313 if (MEM_P (op))
14314 {
14315 lo_half[num] = adjust_address (op, half_mode, 0);
14316 hi_half[num] = adjust_address (op, half_mode, byte);
14317 }
14318 else
14319 {
14320 lo_half[num] = simplify_gen_subreg (half_mode, op,
14321 GET_MODE (op) == VOIDmode
14322 ? mode : GET_MODE (op), 0);
14323 hi_half[num] = simplify_gen_subreg (half_mode, op,
14324 GET_MODE (op) == VOIDmode
14325 ? mode : GET_MODE (op), byte);
14326 }
14327 }
14328 }
14329 \f
14330 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14331 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14332 is the expression of the binary operation. The output may either be
14333 emitted here, or returned to the caller, like all output_* functions.
14334
14335 There is no guarantee that the operands are the same mode, as they
14336 might be within FLOAT or FLOAT_EXTEND expressions. */
14337
14338 #ifndef SYSV386_COMPAT
14339 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14340 wants to fix the assemblers because that causes incompatibility
14341 with gcc. No-one wants to fix gcc because that causes
14342 incompatibility with assemblers... You can use the option of
14343 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14344 #define SYSV386_COMPAT 1
14345 #endif
14346
14347 const char *
14348 output_387_binary_op (rtx insn, rtx *operands)
14349 {
14350 static char buf[40];
14351 const char *p;
14352 const char *ssep;
14353 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14354
14355 #ifdef ENABLE_CHECKING
14356 /* Even if we do not want to check the inputs, this documents input
14357 constraints. Which helps in understanding the following code. */
14358 if (STACK_REG_P (operands[0])
14359 && ((REG_P (operands[1])
14360 && REGNO (operands[0]) == REGNO (operands[1])
14361 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14362 || (REG_P (operands[2])
14363 && REGNO (operands[0]) == REGNO (operands[2])
14364 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14365 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14366 ; /* ok */
14367 else
14368 gcc_assert (is_sse);
14369 #endif
14370
14371 switch (GET_CODE (operands[3]))
14372 {
14373 case PLUS:
14374 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14375 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14376 p = "fiadd";
14377 else
14378 p = "fadd";
14379 ssep = "vadd";
14380 break;
14381
14382 case MINUS:
14383 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14384 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14385 p = "fisub";
14386 else
14387 p = "fsub";
14388 ssep = "vsub";
14389 break;
14390
14391 case MULT:
14392 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14393 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14394 p = "fimul";
14395 else
14396 p = "fmul";
14397 ssep = "vmul";
14398 break;
14399
14400 case DIV:
14401 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14402 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14403 p = "fidiv";
14404 else
14405 p = "fdiv";
14406 ssep = "vdiv";
14407 break;
14408
14409 default:
14410 gcc_unreachable ();
14411 }
14412
14413 if (is_sse)
14414 {
14415 if (TARGET_AVX)
14416 {
14417 strcpy (buf, ssep);
14418 if (GET_MODE (operands[0]) == SFmode)
14419 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14420 else
14421 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14422 }
14423 else
14424 {
14425 strcpy (buf, ssep + 1);
14426 if (GET_MODE (operands[0]) == SFmode)
14427 strcat (buf, "ss\t{%2, %0|%0, %2}");
14428 else
14429 strcat (buf, "sd\t{%2, %0|%0, %2}");
14430 }
14431 return buf;
14432 }
14433 strcpy (buf, p);
14434
14435 switch (GET_CODE (operands[3]))
14436 {
14437 case MULT:
14438 case PLUS:
14439 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14440 {
14441 rtx temp = operands[2];
14442 operands[2] = operands[1];
14443 operands[1] = temp;
14444 }
14445
14446 /* know operands[0] == operands[1]. */
14447
14448 if (MEM_P (operands[2]))
14449 {
14450 p = "%Z2\t%2";
14451 break;
14452 }
14453
14454 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14455 {
14456 if (STACK_TOP_P (operands[0]))
14457 /* How is it that we are storing to a dead operand[2]?
14458 Well, presumably operands[1] is dead too. We can't
14459 store the result to st(0) as st(0) gets popped on this
14460 instruction. Instead store to operands[2] (which I
14461 think has to be st(1)). st(1) will be popped later.
14462 gcc <= 2.8.1 didn't have this check and generated
14463 assembly code that the Unixware assembler rejected. */
14464 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14465 else
14466 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14467 break;
14468 }
14469
14470 if (STACK_TOP_P (operands[0]))
14471 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14472 else
14473 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14474 break;
14475
14476 case MINUS:
14477 case DIV:
14478 if (MEM_P (operands[1]))
14479 {
14480 p = "r%Z1\t%1";
14481 break;
14482 }
14483
14484 if (MEM_P (operands[2]))
14485 {
14486 p = "%Z2\t%2";
14487 break;
14488 }
14489
14490 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14491 {
14492 #if SYSV386_COMPAT
14493 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14494 derived assemblers, confusingly reverse the direction of
14495 the operation for fsub{r} and fdiv{r} when the
14496 destination register is not st(0). The Intel assembler
14497 doesn't have this brain damage. Read !SYSV386_COMPAT to
14498 figure out what the hardware really does. */
14499 if (STACK_TOP_P (operands[0]))
14500 p = "{p\t%0, %2|rp\t%2, %0}";
14501 else
14502 p = "{rp\t%2, %0|p\t%0, %2}";
14503 #else
14504 if (STACK_TOP_P (operands[0]))
14505 /* As above for fmul/fadd, we can't store to st(0). */
14506 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14507 else
14508 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14509 #endif
14510 break;
14511 }
14512
14513 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14514 {
14515 #if SYSV386_COMPAT
14516 if (STACK_TOP_P (operands[0]))
14517 p = "{rp\t%0, %1|p\t%1, %0}";
14518 else
14519 p = "{p\t%1, %0|rp\t%0, %1}";
14520 #else
14521 if (STACK_TOP_P (operands[0]))
14522 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14523 else
14524 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14525 #endif
14526 break;
14527 }
14528
14529 if (STACK_TOP_P (operands[0]))
14530 {
14531 if (STACK_TOP_P (operands[1]))
14532 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14533 else
14534 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14535 break;
14536 }
14537 else if (STACK_TOP_P (operands[1]))
14538 {
14539 #if SYSV386_COMPAT
14540 p = "{\t%1, %0|r\t%0, %1}";
14541 #else
14542 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14543 #endif
14544 }
14545 else
14546 {
14547 #if SYSV386_COMPAT
14548 p = "{r\t%2, %0|\t%0, %2}";
14549 #else
14550 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14551 #endif
14552 }
14553 break;
14554
14555 default:
14556 gcc_unreachable ();
14557 }
14558
14559 strcat (buf, p);
14560 return buf;
14561 }
14562
14563 /* Return needed mode for entity in optimize_mode_switching pass. */
14564
14565 int
14566 ix86_mode_needed (int entity, rtx insn)
14567 {
14568 enum attr_i387_cw mode;
14569
14570 /* The mode UNINITIALIZED is used to store control word after a
14571 function call or ASM pattern. The mode ANY specify that function
14572 has no requirements on the control word and make no changes in the
14573 bits we are interested in. */
14574
14575 if (CALL_P (insn)
14576 || (NONJUMP_INSN_P (insn)
14577 && (asm_noperands (PATTERN (insn)) >= 0
14578 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14579 return I387_CW_UNINITIALIZED;
14580
14581 if (recog_memoized (insn) < 0)
14582 return I387_CW_ANY;
14583
14584 mode = get_attr_i387_cw (insn);
14585
14586 switch (entity)
14587 {
14588 case I387_TRUNC:
14589 if (mode == I387_CW_TRUNC)
14590 return mode;
14591 break;
14592
14593 case I387_FLOOR:
14594 if (mode == I387_CW_FLOOR)
14595 return mode;
14596 break;
14597
14598 case I387_CEIL:
14599 if (mode == I387_CW_CEIL)
14600 return mode;
14601 break;
14602
14603 case I387_MASK_PM:
14604 if (mode == I387_CW_MASK_PM)
14605 return mode;
14606 break;
14607
14608 default:
14609 gcc_unreachable ();
14610 }
14611
14612 return I387_CW_ANY;
14613 }
14614
14615 /* Output code to initialize control word copies used by trunc?f?i and
14616 rounding patterns. CURRENT_MODE is set to current control word,
14617 while NEW_MODE is set to new control word. */
14618
14619 void
14620 emit_i387_cw_initialization (int mode)
14621 {
14622 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14623 rtx new_mode;
14624
14625 enum ix86_stack_slot slot;
14626
14627 rtx reg = gen_reg_rtx (HImode);
14628
14629 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14630 emit_move_insn (reg, copy_rtx (stored_mode));
14631
14632 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14633 || optimize_function_for_size_p (cfun))
14634 {
14635 switch (mode)
14636 {
14637 case I387_CW_TRUNC:
14638 /* round toward zero (truncate) */
14639 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14640 slot = SLOT_CW_TRUNC;
14641 break;
14642
14643 case I387_CW_FLOOR:
14644 /* round down toward -oo */
14645 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14646 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14647 slot = SLOT_CW_FLOOR;
14648 break;
14649
14650 case I387_CW_CEIL:
14651 /* round up toward +oo */
14652 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14653 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14654 slot = SLOT_CW_CEIL;
14655 break;
14656
14657 case I387_CW_MASK_PM:
14658 /* mask precision exception for nearbyint() */
14659 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14660 slot = SLOT_CW_MASK_PM;
14661 break;
14662
14663 default:
14664 gcc_unreachable ();
14665 }
14666 }
14667 else
14668 {
14669 switch (mode)
14670 {
14671 case I387_CW_TRUNC:
14672 /* round toward zero (truncate) */
14673 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14674 slot = SLOT_CW_TRUNC;
14675 break;
14676
14677 case I387_CW_FLOOR:
14678 /* round down toward -oo */
14679 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14680 slot = SLOT_CW_FLOOR;
14681 break;
14682
14683 case I387_CW_CEIL:
14684 /* round up toward +oo */
14685 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14686 slot = SLOT_CW_CEIL;
14687 break;
14688
14689 case I387_CW_MASK_PM:
14690 /* mask precision exception for nearbyint() */
14691 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14692 slot = SLOT_CW_MASK_PM;
14693 break;
14694
14695 default:
14696 gcc_unreachable ();
14697 }
14698 }
14699
14700 gcc_assert (slot < MAX_386_STACK_LOCALS);
14701
14702 new_mode = assign_386_stack_local (HImode, slot);
14703 emit_move_insn (new_mode, reg);
14704 }
14705
14706 /* Output code for INSN to convert a float to a signed int. OPERANDS
14707 are the insn operands. The output may be [HSD]Imode and the input
14708 operand may be [SDX]Fmode. */
14709
14710 const char *
14711 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14712 {
14713 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14714 int dimode_p = GET_MODE (operands[0]) == DImode;
14715 int round_mode = get_attr_i387_cw (insn);
14716
14717 /* Jump through a hoop or two for DImode, since the hardware has no
14718 non-popping instruction. We used to do this a different way, but
14719 that was somewhat fragile and broke with post-reload splitters. */
14720 if ((dimode_p || fisttp) && !stack_top_dies)
14721 output_asm_insn ("fld\t%y1", operands);
14722
14723 gcc_assert (STACK_TOP_P (operands[1]));
14724 gcc_assert (MEM_P (operands[0]));
14725 gcc_assert (GET_MODE (operands[1]) != TFmode);
14726
14727 if (fisttp)
14728 output_asm_insn ("fisttp%Z0\t%0", operands);
14729 else
14730 {
14731 if (round_mode != I387_CW_ANY)
14732 output_asm_insn ("fldcw\t%3", operands);
14733 if (stack_top_dies || dimode_p)
14734 output_asm_insn ("fistp%Z0\t%0", operands);
14735 else
14736 output_asm_insn ("fist%Z0\t%0", operands);
14737 if (round_mode != I387_CW_ANY)
14738 output_asm_insn ("fldcw\t%2", operands);
14739 }
14740
14741 return "";
14742 }
14743
14744 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14745 have the values zero or one, indicates the ffreep insn's operand
14746 from the OPERANDS array. */
14747
14748 static const char *
14749 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14750 {
14751 if (TARGET_USE_FFREEP)
14752 #ifdef HAVE_AS_IX86_FFREEP
14753 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14754 #else
14755 {
14756 static char retval[32];
14757 int regno = REGNO (operands[opno]);
14758
14759 gcc_assert (FP_REGNO_P (regno));
14760
14761 regno -= FIRST_STACK_REG;
14762
14763 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14764 return retval;
14765 }
14766 #endif
14767
14768 return opno ? "fstp\t%y1" : "fstp\t%y0";
14769 }
14770
14771
14772 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14773 should be used. UNORDERED_P is true when fucom should be used. */
14774
14775 const char *
14776 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14777 {
14778 int stack_top_dies;
14779 rtx cmp_op0, cmp_op1;
14780 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14781
14782 if (eflags_p)
14783 {
14784 cmp_op0 = operands[0];
14785 cmp_op1 = operands[1];
14786 }
14787 else
14788 {
14789 cmp_op0 = operands[1];
14790 cmp_op1 = operands[2];
14791 }
14792
14793 if (is_sse)
14794 {
14795 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14796 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14797 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14798 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14799
14800 if (GET_MODE (operands[0]) == SFmode)
14801 if (unordered_p)
14802 return &ucomiss[TARGET_AVX ? 0 : 1];
14803 else
14804 return &comiss[TARGET_AVX ? 0 : 1];
14805 else
14806 if (unordered_p)
14807 return &ucomisd[TARGET_AVX ? 0 : 1];
14808 else
14809 return &comisd[TARGET_AVX ? 0 : 1];
14810 }
14811
14812 gcc_assert (STACK_TOP_P (cmp_op0));
14813
14814 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14815
14816 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14817 {
14818 if (stack_top_dies)
14819 {
14820 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14821 return output_387_ffreep (operands, 1);
14822 }
14823 else
14824 return "ftst\n\tfnstsw\t%0";
14825 }
14826
14827 if (STACK_REG_P (cmp_op1)
14828 && stack_top_dies
14829 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14830 && REGNO (cmp_op1) != FIRST_STACK_REG)
14831 {
14832 /* If both the top of the 387 stack dies, and the other operand
14833 is also a stack register that dies, then this must be a
14834 `fcompp' float compare */
14835
14836 if (eflags_p)
14837 {
14838 /* There is no double popping fcomi variant. Fortunately,
14839 eflags is immune from the fstp's cc clobbering. */
14840 if (unordered_p)
14841 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14842 else
14843 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14844 return output_387_ffreep (operands, 0);
14845 }
14846 else
14847 {
14848 if (unordered_p)
14849 return "fucompp\n\tfnstsw\t%0";
14850 else
14851 return "fcompp\n\tfnstsw\t%0";
14852 }
14853 }
14854 else
14855 {
14856 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14857
14858 static const char * const alt[16] =
14859 {
14860 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14861 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14862 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14863 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14864
14865 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14866 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14867 NULL,
14868 NULL,
14869
14870 "fcomi\t{%y1, %0|%0, %y1}",
14871 "fcomip\t{%y1, %0|%0, %y1}",
14872 "fucomi\t{%y1, %0|%0, %y1}",
14873 "fucomip\t{%y1, %0|%0, %y1}",
14874
14875 NULL,
14876 NULL,
14877 NULL,
14878 NULL
14879 };
14880
14881 int mask;
14882 const char *ret;
14883
14884 mask = eflags_p << 3;
14885 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14886 mask |= unordered_p << 1;
14887 mask |= stack_top_dies;
14888
14889 gcc_assert (mask < 16);
14890 ret = alt[mask];
14891 gcc_assert (ret);
14892
14893 return ret;
14894 }
14895 }
14896
14897 void
14898 ix86_output_addr_vec_elt (FILE *file, int value)
14899 {
14900 const char *directive = ASM_LONG;
14901
14902 #ifdef ASM_QUAD
14903 if (TARGET_LP64)
14904 directive = ASM_QUAD;
14905 #else
14906 gcc_assert (!TARGET_64BIT);
14907 #endif
14908
14909 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14910 }
14911
14912 void
14913 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14914 {
14915 const char *directive = ASM_LONG;
14916
14917 #ifdef ASM_QUAD
14918 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14919 directive = ASM_QUAD;
14920 #else
14921 gcc_assert (!TARGET_64BIT);
14922 #endif
14923 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14924 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14925 fprintf (file, "%s%s%d-%s%d\n",
14926 directive, LPREFIX, value, LPREFIX, rel);
14927 else if (HAVE_AS_GOTOFF_IN_DATA)
14928 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14929 #if TARGET_MACHO
14930 else if (TARGET_MACHO)
14931 {
14932 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14933 machopic_output_function_base_name (file);
14934 putc ('\n', file);
14935 }
14936 #endif
14937 else
14938 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14939 GOT_SYMBOL_NAME, LPREFIX, value);
14940 }
14941 \f
14942 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14943 for the target. */
14944
14945 void
14946 ix86_expand_clear (rtx dest)
14947 {
14948 rtx tmp;
14949
14950 /* We play register width games, which are only valid after reload. */
14951 gcc_assert (reload_completed);
14952
14953 /* Avoid HImode and its attendant prefix byte. */
14954 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14955 dest = gen_rtx_REG (SImode, REGNO (dest));
14956 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14957
14958 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14959 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14960 {
14961 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14962 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14963 }
14964
14965 emit_insn (tmp);
14966 }
14967
14968 /* X is an unchanging MEM. If it is a constant pool reference, return
14969 the constant pool rtx, else NULL. */
14970
14971 rtx
14972 maybe_get_pool_constant (rtx x)
14973 {
14974 x = ix86_delegitimize_address (XEXP (x, 0));
14975
14976 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14977 return get_pool_constant (x);
14978
14979 return NULL_RTX;
14980 }
14981
14982 void
14983 ix86_expand_move (enum machine_mode mode, rtx operands[])
14984 {
14985 rtx op0, op1;
14986 enum tls_model model;
14987
14988 op0 = operands[0];
14989 op1 = operands[1];
14990
14991 if (GET_CODE (op1) == SYMBOL_REF)
14992 {
14993 model = SYMBOL_REF_TLS_MODEL (op1);
14994 if (model)
14995 {
14996 op1 = legitimize_tls_address (op1, model, true);
14997 op1 = force_operand (op1, op0);
14998 if (op1 == op0)
14999 return;
15000 if (GET_MODE (op1) != mode)
15001 op1 = convert_to_mode (mode, op1, 1);
15002 }
15003 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15004 && SYMBOL_REF_DLLIMPORT_P (op1))
15005 op1 = legitimize_dllimport_symbol (op1, false);
15006 }
15007 else if (GET_CODE (op1) == CONST
15008 && GET_CODE (XEXP (op1, 0)) == PLUS
15009 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15010 {
15011 rtx addend = XEXP (XEXP (op1, 0), 1);
15012 rtx symbol = XEXP (XEXP (op1, 0), 0);
15013 rtx tmp = NULL;
15014
15015 model = SYMBOL_REF_TLS_MODEL (symbol);
15016 if (model)
15017 tmp = legitimize_tls_address (symbol, model, true);
15018 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15019 && SYMBOL_REF_DLLIMPORT_P (symbol))
15020 tmp = legitimize_dllimport_symbol (symbol, true);
15021
15022 if (tmp)
15023 {
15024 tmp = force_operand (tmp, NULL);
15025 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15026 op0, 1, OPTAB_DIRECT);
15027 if (tmp == op0)
15028 return;
15029 if (GET_MODE (tmp) != mode)
15030 op1 = convert_to_mode (mode, tmp, 1);
15031 }
15032 }
15033
15034 if ((flag_pic || MACHOPIC_INDIRECT)
15035 && (mode == SImode || mode == DImode)
15036 && symbolic_operand (op1, mode))
15037 {
15038 if (TARGET_MACHO && !TARGET_64BIT)
15039 {
15040 #if TARGET_MACHO
15041 /* dynamic-no-pic */
15042 if (MACHOPIC_INDIRECT)
15043 {
15044 rtx temp = ((reload_in_progress
15045 || ((op0 && REG_P (op0))
15046 && mode == Pmode))
15047 ? op0 : gen_reg_rtx (Pmode));
15048 op1 = machopic_indirect_data_reference (op1, temp);
15049 if (MACHOPIC_PURE)
15050 op1 = machopic_legitimize_pic_address (op1, mode,
15051 temp == op1 ? 0 : temp);
15052 }
15053 if (op0 != op1 && GET_CODE (op0) != MEM)
15054 {
15055 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15056 emit_insn (insn);
15057 return;
15058 }
15059 if (GET_CODE (op0) == MEM)
15060 op1 = force_reg (Pmode, op1);
15061 else
15062 {
15063 rtx temp = op0;
15064 if (GET_CODE (temp) != REG)
15065 temp = gen_reg_rtx (Pmode);
15066 temp = legitimize_pic_address (op1, temp);
15067 if (temp == op0)
15068 return;
15069 op1 = temp;
15070 }
15071 /* dynamic-no-pic */
15072 #endif
15073 }
15074 else
15075 {
15076 if (MEM_P (op0))
15077 op1 = force_reg (mode, op1);
15078 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, mode))
15079 {
15080 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15081 op1 = legitimize_pic_address (op1, reg);
15082 if (op0 == op1)
15083 return;
15084 if (GET_MODE (op1) != mode)
15085 op1 = convert_to_mode (mode, op1, 1);
15086 }
15087 }
15088 }
15089 else
15090 {
15091 if (MEM_P (op0)
15092 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15093 || !push_operand (op0, mode))
15094 && MEM_P (op1))
15095 op1 = force_reg (mode, op1);
15096
15097 if (push_operand (op0, mode)
15098 && ! general_no_elim_operand (op1, mode))
15099 op1 = copy_to_mode_reg (mode, op1);
15100
15101 /* Force large constants in 64bit compilation into register
15102 to get them CSEed. */
15103 if (can_create_pseudo_p ()
15104 && (mode == DImode) && TARGET_64BIT
15105 && immediate_operand (op1, mode)
15106 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15107 && !register_operand (op0, mode)
15108 && optimize)
15109 op1 = copy_to_mode_reg (mode, op1);
15110
15111 if (can_create_pseudo_p ()
15112 && FLOAT_MODE_P (mode)
15113 && GET_CODE (op1) == CONST_DOUBLE)
15114 {
15115 /* If we are loading a floating point constant to a register,
15116 force the value to memory now, since we'll get better code
15117 out the back end. */
15118
15119 op1 = validize_mem (force_const_mem (mode, op1));
15120 if (!register_operand (op0, mode))
15121 {
15122 rtx temp = gen_reg_rtx (mode);
15123 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15124 emit_move_insn (op0, temp);
15125 return;
15126 }
15127 }
15128 }
15129
15130 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15131 }
15132
15133 void
15134 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15135 {
15136 rtx op0 = operands[0], op1 = operands[1];
15137 unsigned int align = GET_MODE_ALIGNMENT (mode);
15138
15139 /* Force constants other than zero into memory. We do not know how
15140 the instructions used to build constants modify the upper 64 bits
15141 of the register, once we have that information we may be able
15142 to handle some of them more efficiently. */
15143 if (can_create_pseudo_p ()
15144 && register_operand (op0, mode)
15145 && (CONSTANT_P (op1)
15146 || (GET_CODE (op1) == SUBREG
15147 && CONSTANT_P (SUBREG_REG (op1))))
15148 && !standard_sse_constant_p (op1))
15149 op1 = validize_mem (force_const_mem (mode, op1));
15150
15151 /* We need to check memory alignment for SSE mode since attribute
15152 can make operands unaligned. */
15153 if (can_create_pseudo_p ()
15154 && SSE_REG_MODE_P (mode)
15155 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15156 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15157 {
15158 rtx tmp[2];
15159
15160 /* ix86_expand_vector_move_misalign() does not like constants ... */
15161 if (CONSTANT_P (op1)
15162 || (GET_CODE (op1) == SUBREG
15163 && CONSTANT_P (SUBREG_REG (op1))))
15164 op1 = validize_mem (force_const_mem (mode, op1));
15165
15166 /* ... nor both arguments in memory. */
15167 if (!register_operand (op0, mode)
15168 && !register_operand (op1, mode))
15169 op1 = force_reg (mode, op1);
15170
15171 tmp[0] = op0; tmp[1] = op1;
15172 ix86_expand_vector_move_misalign (mode, tmp);
15173 return;
15174 }
15175
15176 /* Make operand1 a register if it isn't already. */
15177 if (can_create_pseudo_p ()
15178 && !register_operand (op0, mode)
15179 && !register_operand (op1, mode))
15180 {
15181 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15182 return;
15183 }
15184
15185 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15186 }
15187
15188 /* Split 32-byte AVX unaligned load and store if needed. */
15189
15190 static void
15191 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15192 {
15193 rtx m;
15194 rtx (*extract) (rtx, rtx, rtx);
15195 rtx (*move_unaligned) (rtx, rtx);
15196 enum machine_mode mode;
15197
15198 switch (GET_MODE (op0))
15199 {
15200 default:
15201 gcc_unreachable ();
15202 case V32QImode:
15203 extract = gen_avx_vextractf128v32qi;
15204 move_unaligned = gen_avx_movdqu256;
15205 mode = V16QImode;
15206 break;
15207 case V8SFmode:
15208 extract = gen_avx_vextractf128v8sf;
15209 move_unaligned = gen_avx_movups256;
15210 mode = V4SFmode;
15211 break;
15212 case V4DFmode:
15213 extract = gen_avx_vextractf128v4df;
15214 move_unaligned = gen_avx_movupd256;
15215 mode = V2DFmode;
15216 break;
15217 }
15218
15219 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15220 {
15221 rtx r = gen_reg_rtx (mode);
15222 m = adjust_address (op1, mode, 0);
15223 emit_move_insn (r, m);
15224 m = adjust_address (op1, mode, 16);
15225 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15226 emit_move_insn (op0, r);
15227 }
15228 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15229 {
15230 m = adjust_address (op0, mode, 0);
15231 emit_insn (extract (m, op1, const0_rtx));
15232 m = adjust_address (op0, mode, 16);
15233 emit_insn (extract (m, op1, const1_rtx));
15234 }
15235 else
15236 emit_insn (move_unaligned (op0, op1));
15237 }
15238
15239 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15240 straight to ix86_expand_vector_move. */
15241 /* Code generation for scalar reg-reg moves of single and double precision data:
15242 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15243 movaps reg, reg
15244 else
15245 movss reg, reg
15246 if (x86_sse_partial_reg_dependency == true)
15247 movapd reg, reg
15248 else
15249 movsd reg, reg
15250
15251 Code generation for scalar loads of double precision data:
15252 if (x86_sse_split_regs == true)
15253 movlpd mem, reg (gas syntax)
15254 else
15255 movsd mem, reg
15256
15257 Code generation for unaligned packed loads of single precision data
15258 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15259 if (x86_sse_unaligned_move_optimal)
15260 movups mem, reg
15261
15262 if (x86_sse_partial_reg_dependency == true)
15263 {
15264 xorps reg, reg
15265 movlps mem, reg
15266 movhps mem+8, reg
15267 }
15268 else
15269 {
15270 movlps mem, reg
15271 movhps mem+8, reg
15272 }
15273
15274 Code generation for unaligned packed loads of double precision data
15275 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15276 if (x86_sse_unaligned_move_optimal)
15277 movupd mem, reg
15278
15279 if (x86_sse_split_regs == true)
15280 {
15281 movlpd mem, reg
15282 movhpd mem+8, reg
15283 }
15284 else
15285 {
15286 movsd mem, reg
15287 movhpd mem+8, reg
15288 }
15289 */
15290
15291 void
15292 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15293 {
15294 rtx op0, op1, m;
15295
15296 op0 = operands[0];
15297 op1 = operands[1];
15298
15299 if (TARGET_AVX)
15300 {
15301 switch (GET_MODE_CLASS (mode))
15302 {
15303 case MODE_VECTOR_INT:
15304 case MODE_INT:
15305 switch (GET_MODE_SIZE (mode))
15306 {
15307 case 16:
15308 /* If we're optimizing for size, movups is the smallest. */
15309 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15310 {
15311 op0 = gen_lowpart (V4SFmode, op0);
15312 op1 = gen_lowpart (V4SFmode, op1);
15313 emit_insn (gen_sse_movups (op0, op1));
15314 return;
15315 }
15316 op0 = gen_lowpart (V16QImode, op0);
15317 op1 = gen_lowpart (V16QImode, op1);
15318 emit_insn (gen_sse2_movdqu (op0, op1));
15319 break;
15320 case 32:
15321 op0 = gen_lowpart (V32QImode, op0);
15322 op1 = gen_lowpart (V32QImode, op1);
15323 ix86_avx256_split_vector_move_misalign (op0, op1);
15324 break;
15325 default:
15326 gcc_unreachable ();
15327 }
15328 break;
15329 case MODE_VECTOR_FLOAT:
15330 op0 = gen_lowpart (mode, op0);
15331 op1 = gen_lowpart (mode, op1);
15332
15333 switch (mode)
15334 {
15335 case V4SFmode:
15336 emit_insn (gen_sse_movups (op0, op1));
15337 break;
15338 case V8SFmode:
15339 ix86_avx256_split_vector_move_misalign (op0, op1);
15340 break;
15341 case V2DFmode:
15342 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15343 {
15344 op0 = gen_lowpart (V4SFmode, op0);
15345 op1 = gen_lowpart (V4SFmode, op1);
15346 emit_insn (gen_sse_movups (op0, op1));
15347 return;
15348 }
15349 emit_insn (gen_sse2_movupd (op0, op1));
15350 break;
15351 case V4DFmode:
15352 ix86_avx256_split_vector_move_misalign (op0, op1);
15353 break;
15354 default:
15355 gcc_unreachable ();
15356 }
15357 break;
15358
15359 default:
15360 gcc_unreachable ();
15361 }
15362
15363 return;
15364 }
15365
15366 if (MEM_P (op1))
15367 {
15368 /* If we're optimizing for size, movups is the smallest. */
15369 if (optimize_insn_for_size_p ()
15370 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15371 {
15372 op0 = gen_lowpart (V4SFmode, op0);
15373 op1 = gen_lowpart (V4SFmode, op1);
15374 emit_insn (gen_sse_movups (op0, op1));
15375 return;
15376 }
15377
15378 /* ??? If we have typed data, then it would appear that using
15379 movdqu is the only way to get unaligned data loaded with
15380 integer type. */
15381 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15382 {
15383 op0 = gen_lowpart (V16QImode, op0);
15384 op1 = gen_lowpart (V16QImode, op1);
15385 emit_insn (gen_sse2_movdqu (op0, op1));
15386 return;
15387 }
15388
15389 if (TARGET_SSE2 && mode == V2DFmode)
15390 {
15391 rtx zero;
15392
15393 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15394 {
15395 op0 = gen_lowpart (V2DFmode, op0);
15396 op1 = gen_lowpart (V2DFmode, op1);
15397 emit_insn (gen_sse2_movupd (op0, op1));
15398 return;
15399 }
15400
15401 /* When SSE registers are split into halves, we can avoid
15402 writing to the top half twice. */
15403 if (TARGET_SSE_SPLIT_REGS)
15404 {
15405 emit_clobber (op0);
15406 zero = op0;
15407 }
15408 else
15409 {
15410 /* ??? Not sure about the best option for the Intel chips.
15411 The following would seem to satisfy; the register is
15412 entirely cleared, breaking the dependency chain. We
15413 then store to the upper half, with a dependency depth
15414 of one. A rumor has it that Intel recommends two movsd
15415 followed by an unpacklpd, but this is unconfirmed. And
15416 given that the dependency depth of the unpacklpd would
15417 still be one, I'm not sure why this would be better. */
15418 zero = CONST0_RTX (V2DFmode);
15419 }
15420
15421 m = adjust_address (op1, DFmode, 0);
15422 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15423 m = adjust_address (op1, DFmode, 8);
15424 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15425 }
15426 else
15427 {
15428 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15429 {
15430 op0 = gen_lowpart (V4SFmode, op0);
15431 op1 = gen_lowpart (V4SFmode, op1);
15432 emit_insn (gen_sse_movups (op0, op1));
15433 return;
15434 }
15435
15436 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15437 emit_move_insn (op0, CONST0_RTX (mode));
15438 else
15439 emit_clobber (op0);
15440
15441 if (mode != V4SFmode)
15442 op0 = gen_lowpart (V4SFmode, op0);
15443 m = adjust_address (op1, V2SFmode, 0);
15444 emit_insn (gen_sse_loadlps (op0, op0, m));
15445 m = adjust_address (op1, V2SFmode, 8);
15446 emit_insn (gen_sse_loadhps (op0, op0, m));
15447 }
15448 }
15449 else if (MEM_P (op0))
15450 {
15451 /* If we're optimizing for size, movups is the smallest. */
15452 if (optimize_insn_for_size_p ()
15453 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15454 {
15455 op0 = gen_lowpart (V4SFmode, op0);
15456 op1 = gen_lowpart (V4SFmode, op1);
15457 emit_insn (gen_sse_movups (op0, op1));
15458 return;
15459 }
15460
15461 /* ??? Similar to above, only less clear because of quote
15462 typeless stores unquote. */
15463 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15464 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15465 {
15466 op0 = gen_lowpart (V16QImode, op0);
15467 op1 = gen_lowpart (V16QImode, op1);
15468 emit_insn (gen_sse2_movdqu (op0, op1));
15469 return;
15470 }
15471
15472 if (TARGET_SSE2 && mode == V2DFmode)
15473 {
15474 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15475 {
15476 op0 = gen_lowpart (V2DFmode, op0);
15477 op1 = gen_lowpart (V2DFmode, op1);
15478 emit_insn (gen_sse2_movupd (op0, op1));
15479 }
15480 else
15481 {
15482 m = adjust_address (op0, DFmode, 0);
15483 emit_insn (gen_sse2_storelpd (m, op1));
15484 m = adjust_address (op0, DFmode, 8);
15485 emit_insn (gen_sse2_storehpd (m, op1));
15486 }
15487 }
15488 else
15489 {
15490 if (mode != V4SFmode)
15491 op1 = gen_lowpart (V4SFmode, op1);
15492
15493 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15494 {
15495 op0 = gen_lowpart (V4SFmode, op0);
15496 emit_insn (gen_sse_movups (op0, op1));
15497 }
15498 else
15499 {
15500 m = adjust_address (op0, V2SFmode, 0);
15501 emit_insn (gen_sse_storelps (m, op1));
15502 m = adjust_address (op0, V2SFmode, 8);
15503 emit_insn (gen_sse_storehps (m, op1));
15504 }
15505 }
15506 }
15507 else
15508 gcc_unreachable ();
15509 }
15510
15511 /* Expand a push in MODE. This is some mode for which we do not support
15512 proper push instructions, at least from the registers that we expect
15513 the value to live in. */
15514
15515 void
15516 ix86_expand_push (enum machine_mode mode, rtx x)
15517 {
15518 rtx tmp;
15519
15520 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15521 GEN_INT (-GET_MODE_SIZE (mode)),
15522 stack_pointer_rtx, 1, OPTAB_DIRECT);
15523 if (tmp != stack_pointer_rtx)
15524 emit_move_insn (stack_pointer_rtx, tmp);
15525
15526 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15527
15528 /* When we push an operand onto stack, it has to be aligned at least
15529 at the function argument boundary. However since we don't have
15530 the argument type, we can't determine the actual argument
15531 boundary. */
15532 emit_move_insn (tmp, x);
15533 }
15534
15535 /* Helper function of ix86_fixup_binary_operands to canonicalize
15536 operand order. Returns true if the operands should be swapped. */
15537
15538 static bool
15539 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15540 rtx operands[])
15541 {
15542 rtx dst = operands[0];
15543 rtx src1 = operands[1];
15544 rtx src2 = operands[2];
15545
15546 /* If the operation is not commutative, we can't do anything. */
15547 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15548 return false;
15549
15550 /* Highest priority is that src1 should match dst. */
15551 if (rtx_equal_p (dst, src1))
15552 return false;
15553 if (rtx_equal_p (dst, src2))
15554 return true;
15555
15556 /* Next highest priority is that immediate constants come second. */
15557 if (immediate_operand (src2, mode))
15558 return false;
15559 if (immediate_operand (src1, mode))
15560 return true;
15561
15562 /* Lowest priority is that memory references should come second. */
15563 if (MEM_P (src2))
15564 return false;
15565 if (MEM_P (src1))
15566 return true;
15567
15568 return false;
15569 }
15570
15571
15572 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15573 destination to use for the operation. If different from the true
15574 destination in operands[0], a copy operation will be required. */
15575
15576 rtx
15577 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15578 rtx operands[])
15579 {
15580 rtx dst = operands[0];
15581 rtx src1 = operands[1];
15582 rtx src2 = operands[2];
15583
15584 /* Canonicalize operand order. */
15585 if (ix86_swap_binary_operands_p (code, mode, operands))
15586 {
15587 rtx temp;
15588
15589 /* It is invalid to swap operands of different modes. */
15590 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15591
15592 temp = src1;
15593 src1 = src2;
15594 src2 = temp;
15595 }
15596
15597 /* Both source operands cannot be in memory. */
15598 if (MEM_P (src1) && MEM_P (src2))
15599 {
15600 /* Optimization: Only read from memory once. */
15601 if (rtx_equal_p (src1, src2))
15602 {
15603 src2 = force_reg (mode, src2);
15604 src1 = src2;
15605 }
15606 else
15607 src2 = force_reg (mode, src2);
15608 }
15609
15610 /* If the destination is memory, and we do not have matching source
15611 operands, do things in registers. */
15612 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15613 dst = gen_reg_rtx (mode);
15614
15615 /* Source 1 cannot be a constant. */
15616 if (CONSTANT_P (src1))
15617 src1 = force_reg (mode, src1);
15618
15619 /* Source 1 cannot be a non-matching memory. */
15620 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15621 src1 = force_reg (mode, src1);
15622
15623 operands[1] = src1;
15624 operands[2] = src2;
15625 return dst;
15626 }
15627
15628 /* Similarly, but assume that the destination has already been
15629 set up properly. */
15630
15631 void
15632 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15633 enum machine_mode mode, rtx operands[])
15634 {
15635 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15636 gcc_assert (dst == operands[0]);
15637 }
15638
15639 /* Attempt to expand a binary operator. Make the expansion closer to the
15640 actual machine, then just general_operand, which will allow 3 separate
15641 memory references (one output, two input) in a single insn. */
15642
15643 void
15644 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15645 rtx operands[])
15646 {
15647 rtx src1, src2, dst, op, clob;
15648
15649 dst = ix86_fixup_binary_operands (code, mode, operands);
15650 src1 = operands[1];
15651 src2 = operands[2];
15652
15653 /* Emit the instruction. */
15654
15655 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15656 if (reload_in_progress)
15657 {
15658 /* Reload doesn't know about the flags register, and doesn't know that
15659 it doesn't want to clobber it. We can only do this with PLUS. */
15660 gcc_assert (code == PLUS);
15661 emit_insn (op);
15662 }
15663 else if (reload_completed
15664 && code == PLUS
15665 && !rtx_equal_p (dst, src1))
15666 {
15667 /* This is going to be an LEA; avoid splitting it later. */
15668 emit_insn (op);
15669 }
15670 else
15671 {
15672 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15673 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15674 }
15675
15676 /* Fix up the destination if needed. */
15677 if (dst != operands[0])
15678 emit_move_insn (operands[0], dst);
15679 }
15680
15681 /* Return TRUE or FALSE depending on whether the binary operator meets the
15682 appropriate constraints. */
15683
15684 bool
15685 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15686 rtx operands[3])
15687 {
15688 rtx dst = operands[0];
15689 rtx src1 = operands[1];
15690 rtx src2 = operands[2];
15691
15692 /* Both source operands cannot be in memory. */
15693 if (MEM_P (src1) && MEM_P (src2))
15694 return false;
15695
15696 /* Canonicalize operand order for commutative operators. */
15697 if (ix86_swap_binary_operands_p (code, mode, operands))
15698 {
15699 rtx temp = src1;
15700 src1 = src2;
15701 src2 = temp;
15702 }
15703
15704 /* If the destination is memory, we must have a matching source operand. */
15705 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15706 return false;
15707
15708 /* Source 1 cannot be a constant. */
15709 if (CONSTANT_P (src1))
15710 return false;
15711
15712 /* Source 1 cannot be a non-matching memory. */
15713 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15714 {
15715 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15716 return (code == AND
15717 && (mode == HImode
15718 || mode == SImode
15719 || (TARGET_64BIT && mode == DImode))
15720 && CONST_INT_P (src2)
15721 && (INTVAL (src2) == 0xff
15722 || INTVAL (src2) == 0xffff));
15723 }
15724
15725 return true;
15726 }
15727
15728 /* Attempt to expand a unary operator. Make the expansion closer to the
15729 actual machine, then just general_operand, which will allow 2 separate
15730 memory references (one output, one input) in a single insn. */
15731
15732 void
15733 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15734 rtx operands[])
15735 {
15736 int matching_memory;
15737 rtx src, dst, op, clob;
15738
15739 dst = operands[0];
15740 src = operands[1];
15741
15742 /* If the destination is memory, and we do not have matching source
15743 operands, do things in registers. */
15744 matching_memory = 0;
15745 if (MEM_P (dst))
15746 {
15747 if (rtx_equal_p (dst, src))
15748 matching_memory = 1;
15749 else
15750 dst = gen_reg_rtx (mode);
15751 }
15752
15753 /* When source operand is memory, destination must match. */
15754 if (MEM_P (src) && !matching_memory)
15755 src = force_reg (mode, src);
15756
15757 /* Emit the instruction. */
15758
15759 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15760 if (reload_in_progress || code == NOT)
15761 {
15762 /* Reload doesn't know about the flags register, and doesn't know that
15763 it doesn't want to clobber it. */
15764 gcc_assert (code == NOT);
15765 emit_insn (op);
15766 }
15767 else
15768 {
15769 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15770 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15771 }
15772
15773 /* Fix up the destination if needed. */
15774 if (dst != operands[0])
15775 emit_move_insn (operands[0], dst);
15776 }
15777
15778 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15779 divisor are within the range [0-255]. */
15780
15781 void
15782 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15783 bool signed_p)
15784 {
15785 rtx end_label, qimode_label;
15786 rtx insn, div, mod;
15787 rtx scratch, tmp0, tmp1, tmp2;
15788 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15789 rtx (*gen_zero_extend) (rtx, rtx);
15790 rtx (*gen_test_ccno_1) (rtx, rtx);
15791
15792 switch (mode)
15793 {
15794 case SImode:
15795 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15796 gen_test_ccno_1 = gen_testsi_ccno_1;
15797 gen_zero_extend = gen_zero_extendqisi2;
15798 break;
15799 case DImode:
15800 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15801 gen_test_ccno_1 = gen_testdi_ccno_1;
15802 gen_zero_extend = gen_zero_extendqidi2;
15803 break;
15804 default:
15805 gcc_unreachable ();
15806 }
15807
15808 end_label = gen_label_rtx ();
15809 qimode_label = gen_label_rtx ();
15810
15811 scratch = gen_reg_rtx (mode);
15812
15813 /* Use 8bit unsigned divimod if dividend and divisor are within
15814 the range [0-255]. */
15815 emit_move_insn (scratch, operands[2]);
15816 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15817 scratch, 1, OPTAB_DIRECT);
15818 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15819 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15820 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15821 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15822 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15823 pc_rtx);
15824 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15825 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15826 JUMP_LABEL (insn) = qimode_label;
15827
15828 /* Generate original signed/unsigned divimod. */
15829 div = gen_divmod4_1 (operands[0], operands[1],
15830 operands[2], operands[3]);
15831 emit_insn (div);
15832
15833 /* Branch to the end. */
15834 emit_jump_insn (gen_jump (end_label));
15835 emit_barrier ();
15836
15837 /* Generate 8bit unsigned divide. */
15838 emit_label (qimode_label);
15839 /* Don't use operands[0] for result of 8bit divide since not all
15840 registers support QImode ZERO_EXTRACT. */
15841 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15842 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15843 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15844 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15845
15846 if (signed_p)
15847 {
15848 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15849 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15850 }
15851 else
15852 {
15853 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15854 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15855 }
15856
15857 /* Extract remainder from AH. */
15858 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15859 if (REG_P (operands[1]))
15860 insn = emit_move_insn (operands[1], tmp1);
15861 else
15862 {
15863 /* Need a new scratch register since the old one has result
15864 of 8bit divide. */
15865 scratch = gen_reg_rtx (mode);
15866 emit_move_insn (scratch, tmp1);
15867 insn = emit_move_insn (operands[1], scratch);
15868 }
15869 set_unique_reg_note (insn, REG_EQUAL, mod);
15870
15871 /* Zero extend quotient from AL. */
15872 tmp1 = gen_lowpart (QImode, tmp0);
15873 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15874 set_unique_reg_note (insn, REG_EQUAL, div);
15875
15876 emit_label (end_label);
15877 }
15878
15879 #define LEA_SEARCH_THRESHOLD 12
15880
15881 /* Search backward for non-agu definition of register number REGNO1
15882 or register number REGNO2 in INSN's basic block until
15883 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15884 2. Reach BB boundary, or
15885 3. Reach agu definition.
15886 Returns the distance between the non-agu definition point and INSN.
15887 If no definition point, returns -1. */
15888
15889 static int
15890 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15891 rtx insn)
15892 {
15893 basic_block bb = BLOCK_FOR_INSN (insn);
15894 int distance = 0;
15895 df_ref *def_rec;
15896 enum attr_type insn_type;
15897
15898 if (insn != BB_HEAD (bb))
15899 {
15900 rtx prev = PREV_INSN (insn);
15901 while (prev && distance < LEA_SEARCH_THRESHOLD)
15902 {
15903 if (NONDEBUG_INSN_P (prev))
15904 {
15905 distance++;
15906 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15907 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15908 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15909 && (regno1 == DF_REF_REGNO (*def_rec)
15910 || regno2 == DF_REF_REGNO (*def_rec)))
15911 {
15912 insn_type = get_attr_type (prev);
15913 if (insn_type != TYPE_LEA)
15914 goto done;
15915 }
15916 }
15917 if (prev == BB_HEAD (bb))
15918 break;
15919 prev = PREV_INSN (prev);
15920 }
15921 }
15922
15923 if (distance < LEA_SEARCH_THRESHOLD)
15924 {
15925 edge e;
15926 edge_iterator ei;
15927 bool simple_loop = false;
15928
15929 FOR_EACH_EDGE (e, ei, bb->preds)
15930 if (e->src == bb)
15931 {
15932 simple_loop = true;
15933 break;
15934 }
15935
15936 if (simple_loop)
15937 {
15938 rtx prev = BB_END (bb);
15939 while (prev
15940 && prev != insn
15941 && distance < LEA_SEARCH_THRESHOLD)
15942 {
15943 if (NONDEBUG_INSN_P (prev))
15944 {
15945 distance++;
15946 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15947 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15948 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15949 && (regno1 == DF_REF_REGNO (*def_rec)
15950 || regno2 == DF_REF_REGNO (*def_rec)))
15951 {
15952 insn_type = get_attr_type (prev);
15953 if (insn_type != TYPE_LEA)
15954 goto done;
15955 }
15956 }
15957 prev = PREV_INSN (prev);
15958 }
15959 }
15960 }
15961
15962 distance = -1;
15963
15964 done:
15965 /* get_attr_type may modify recog data. We want to make sure
15966 that recog data is valid for instruction INSN, on which
15967 distance_non_agu_define is called. INSN is unchanged here. */
15968 extract_insn_cached (insn);
15969 return distance;
15970 }
15971
15972 /* Return the distance between INSN and the next insn that uses
15973 register number REGNO0 in memory address. Return -1 if no such
15974 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15975
15976 static int
15977 distance_agu_use (unsigned int regno0, rtx insn)
15978 {
15979 basic_block bb = BLOCK_FOR_INSN (insn);
15980 int distance = 0;
15981 df_ref *def_rec;
15982 df_ref *use_rec;
15983
15984 if (insn != BB_END (bb))
15985 {
15986 rtx next = NEXT_INSN (insn);
15987 while (next && distance < LEA_SEARCH_THRESHOLD)
15988 {
15989 if (NONDEBUG_INSN_P (next))
15990 {
15991 distance++;
15992
15993 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15994 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15995 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15996 && regno0 == DF_REF_REGNO (*use_rec))
15997 {
15998 /* Return DISTANCE if OP0 is used in memory
15999 address in NEXT. */
16000 return distance;
16001 }
16002
16003 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16004 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16005 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16006 && regno0 == DF_REF_REGNO (*def_rec))
16007 {
16008 /* Return -1 if OP0 is set in NEXT. */
16009 return -1;
16010 }
16011 }
16012 if (next == BB_END (bb))
16013 break;
16014 next = NEXT_INSN (next);
16015 }
16016 }
16017
16018 if (distance < LEA_SEARCH_THRESHOLD)
16019 {
16020 edge e;
16021 edge_iterator ei;
16022 bool simple_loop = false;
16023
16024 FOR_EACH_EDGE (e, ei, bb->succs)
16025 if (e->dest == bb)
16026 {
16027 simple_loop = true;
16028 break;
16029 }
16030
16031 if (simple_loop)
16032 {
16033 rtx next = BB_HEAD (bb);
16034 while (next
16035 && next != insn
16036 && distance < LEA_SEARCH_THRESHOLD)
16037 {
16038 if (NONDEBUG_INSN_P (next))
16039 {
16040 distance++;
16041
16042 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16043 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16044 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16045 && regno0 == DF_REF_REGNO (*use_rec))
16046 {
16047 /* Return DISTANCE if OP0 is used in memory
16048 address in NEXT. */
16049 return distance;
16050 }
16051
16052 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16053 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16054 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16055 && regno0 == DF_REF_REGNO (*def_rec))
16056 {
16057 /* Return -1 if OP0 is set in NEXT. */
16058 return -1;
16059 }
16060
16061 }
16062 next = NEXT_INSN (next);
16063 }
16064 }
16065 }
16066
16067 return -1;
16068 }
16069
16070 /* Define this macro to tune LEA priority vs ADD, it take effect when
16071 there is a dilemma of choicing LEA or ADD
16072 Negative value: ADD is more preferred than LEA
16073 Zero: Netrual
16074 Positive value: LEA is more preferred than ADD*/
16075 #define IX86_LEA_PRIORITY 2
16076
16077 /* Return true if it is ok to optimize an ADD operation to LEA
16078 operation to avoid flag register consumation. For most processors,
16079 ADD is faster than LEA. For the processors like ATOM, if the
16080 destination register of LEA holds an actual address which will be
16081 used soon, LEA is better and otherwise ADD is better. */
16082
16083 bool
16084 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16085 {
16086 unsigned int regno0 = true_regnum (operands[0]);
16087 unsigned int regno1 = true_regnum (operands[1]);
16088 unsigned int regno2 = true_regnum (operands[2]);
16089
16090 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16091 if (regno0 != regno1 && regno0 != regno2)
16092 return true;
16093
16094 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16095 return false;
16096 else
16097 {
16098 int dist_define, dist_use;
16099
16100 /* Return false if REGNO0 isn't used in memory address. */
16101 dist_use = distance_agu_use (regno0, insn);
16102 if (dist_use <= 0)
16103 return false;
16104
16105 dist_define = distance_non_agu_define (regno1, regno2, insn);
16106 if (dist_define <= 0)
16107 return true;
16108
16109 /* If this insn has both backward non-agu dependence and forward
16110 agu dependence, the one with short distance take effect. */
16111 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16112 return false;
16113
16114 return true;
16115 }
16116 }
16117
16118 /* Return true if destination reg of SET_BODY is shift count of
16119 USE_BODY. */
16120
16121 static bool
16122 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16123 {
16124 rtx set_dest;
16125 rtx shift_rtx;
16126 int i;
16127
16128 /* Retrieve destination of SET_BODY. */
16129 switch (GET_CODE (set_body))
16130 {
16131 case SET:
16132 set_dest = SET_DEST (set_body);
16133 if (!set_dest || !REG_P (set_dest))
16134 return false;
16135 break;
16136 case PARALLEL:
16137 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16138 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16139 use_body))
16140 return true;
16141 default:
16142 return false;
16143 break;
16144 }
16145
16146 /* Retrieve shift count of USE_BODY. */
16147 switch (GET_CODE (use_body))
16148 {
16149 case SET:
16150 shift_rtx = XEXP (use_body, 1);
16151 break;
16152 case PARALLEL:
16153 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16154 if (ix86_dep_by_shift_count_body (set_body,
16155 XVECEXP (use_body, 0, i)))
16156 return true;
16157 default:
16158 return false;
16159 break;
16160 }
16161
16162 if (shift_rtx
16163 && (GET_CODE (shift_rtx) == ASHIFT
16164 || GET_CODE (shift_rtx) == LSHIFTRT
16165 || GET_CODE (shift_rtx) == ASHIFTRT
16166 || GET_CODE (shift_rtx) == ROTATE
16167 || GET_CODE (shift_rtx) == ROTATERT))
16168 {
16169 rtx shift_count = XEXP (shift_rtx, 1);
16170
16171 /* Return true if shift count is dest of SET_BODY. */
16172 if (REG_P (shift_count)
16173 && true_regnum (set_dest) == true_regnum (shift_count))
16174 return true;
16175 }
16176
16177 return false;
16178 }
16179
16180 /* Return true if destination reg of SET_INSN is shift count of
16181 USE_INSN. */
16182
16183 bool
16184 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16185 {
16186 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16187 PATTERN (use_insn));
16188 }
16189
16190 /* Return TRUE or FALSE depending on whether the unary operator meets the
16191 appropriate constraints. */
16192
16193 bool
16194 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16195 enum machine_mode mode ATTRIBUTE_UNUSED,
16196 rtx operands[2] ATTRIBUTE_UNUSED)
16197 {
16198 /* If one of operands is memory, source and destination must match. */
16199 if ((MEM_P (operands[0])
16200 || MEM_P (operands[1]))
16201 && ! rtx_equal_p (operands[0], operands[1]))
16202 return false;
16203 return true;
16204 }
16205
16206 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16207 are ok, keeping in mind the possible movddup alternative. */
16208
16209 bool
16210 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16211 {
16212 if (MEM_P (operands[0]))
16213 return rtx_equal_p (operands[0], operands[1 + high]);
16214 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16215 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16216 return true;
16217 }
16218
16219 /* Post-reload splitter for converting an SF or DFmode value in an
16220 SSE register into an unsigned SImode. */
16221
16222 void
16223 ix86_split_convert_uns_si_sse (rtx operands[])
16224 {
16225 enum machine_mode vecmode;
16226 rtx value, large, zero_or_two31, input, two31, x;
16227
16228 large = operands[1];
16229 zero_or_two31 = operands[2];
16230 input = operands[3];
16231 two31 = operands[4];
16232 vecmode = GET_MODE (large);
16233 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16234
16235 /* Load up the value into the low element. We must ensure that the other
16236 elements are valid floats -- zero is the easiest such value. */
16237 if (MEM_P (input))
16238 {
16239 if (vecmode == V4SFmode)
16240 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16241 else
16242 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16243 }
16244 else
16245 {
16246 input = gen_rtx_REG (vecmode, REGNO (input));
16247 emit_move_insn (value, CONST0_RTX (vecmode));
16248 if (vecmode == V4SFmode)
16249 emit_insn (gen_sse_movss (value, value, input));
16250 else
16251 emit_insn (gen_sse2_movsd (value, value, input));
16252 }
16253
16254 emit_move_insn (large, two31);
16255 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16256
16257 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16258 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16259
16260 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16261 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16262
16263 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16264 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16265
16266 large = gen_rtx_REG (V4SImode, REGNO (large));
16267 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16268
16269 x = gen_rtx_REG (V4SImode, REGNO (value));
16270 if (vecmode == V4SFmode)
16271 emit_insn (gen_sse2_cvttps2dq (x, value));
16272 else
16273 emit_insn (gen_sse2_cvttpd2dq (x, value));
16274 value = x;
16275
16276 emit_insn (gen_xorv4si3 (value, value, large));
16277 }
16278
16279 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16280 Expects the 64-bit DImode to be supplied in a pair of integral
16281 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16282 -mfpmath=sse, !optimize_size only. */
16283
16284 void
16285 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16286 {
16287 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16288 rtx int_xmm, fp_xmm;
16289 rtx biases, exponents;
16290 rtx x;
16291
16292 int_xmm = gen_reg_rtx (V4SImode);
16293 if (TARGET_INTER_UNIT_MOVES)
16294 emit_insn (gen_movdi_to_sse (int_xmm, input));
16295 else if (TARGET_SSE_SPLIT_REGS)
16296 {
16297 emit_clobber (int_xmm);
16298 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16299 }
16300 else
16301 {
16302 x = gen_reg_rtx (V2DImode);
16303 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16304 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16305 }
16306
16307 x = gen_rtx_CONST_VECTOR (V4SImode,
16308 gen_rtvec (4, GEN_INT (0x43300000UL),
16309 GEN_INT (0x45300000UL),
16310 const0_rtx, const0_rtx));
16311 exponents = validize_mem (force_const_mem (V4SImode, x));
16312
16313 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16314 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16315
16316 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16317 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16318 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16319 (0x1.0p84 + double(fp_value_hi_xmm)).
16320 Note these exponents differ by 32. */
16321
16322 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16323
16324 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16325 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16326 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16327 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16328 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16329 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16330 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16331 biases = validize_mem (force_const_mem (V2DFmode, biases));
16332 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16333
16334 /* Add the upper and lower DFmode values together. */
16335 if (TARGET_SSE3)
16336 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16337 else
16338 {
16339 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16340 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16341 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16342 }
16343
16344 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16345 }
16346
16347 /* Not used, but eases macroization of patterns. */
16348 void
16349 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16350 rtx input ATTRIBUTE_UNUSED)
16351 {
16352 gcc_unreachable ();
16353 }
16354
16355 /* Convert an unsigned SImode value into a DFmode. Only currently used
16356 for SSE, but applicable anywhere. */
16357
16358 void
16359 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16360 {
16361 REAL_VALUE_TYPE TWO31r;
16362 rtx x, fp;
16363
16364 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16365 NULL, 1, OPTAB_DIRECT);
16366
16367 fp = gen_reg_rtx (DFmode);
16368 emit_insn (gen_floatsidf2 (fp, x));
16369
16370 real_ldexp (&TWO31r, &dconst1, 31);
16371 x = const_double_from_real_value (TWO31r, DFmode);
16372
16373 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16374 if (x != target)
16375 emit_move_insn (target, x);
16376 }
16377
16378 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16379 32-bit mode; otherwise we have a direct convert instruction. */
16380
16381 void
16382 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16383 {
16384 REAL_VALUE_TYPE TWO32r;
16385 rtx fp_lo, fp_hi, x;
16386
16387 fp_lo = gen_reg_rtx (DFmode);
16388 fp_hi = gen_reg_rtx (DFmode);
16389
16390 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16391
16392 real_ldexp (&TWO32r, &dconst1, 32);
16393 x = const_double_from_real_value (TWO32r, DFmode);
16394 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16395
16396 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16397
16398 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16399 0, OPTAB_DIRECT);
16400 if (x != target)
16401 emit_move_insn (target, x);
16402 }
16403
16404 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16405 For x86_32, -mfpmath=sse, !optimize_size only. */
16406 void
16407 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16408 {
16409 REAL_VALUE_TYPE ONE16r;
16410 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16411
16412 real_ldexp (&ONE16r, &dconst1, 16);
16413 x = const_double_from_real_value (ONE16r, SFmode);
16414 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16415 NULL, 0, OPTAB_DIRECT);
16416 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16417 NULL, 0, OPTAB_DIRECT);
16418 fp_hi = gen_reg_rtx (SFmode);
16419 fp_lo = gen_reg_rtx (SFmode);
16420 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16421 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16422 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16423 0, OPTAB_DIRECT);
16424 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16425 0, OPTAB_DIRECT);
16426 if (!rtx_equal_p (target, fp_hi))
16427 emit_move_insn (target, fp_hi);
16428 }
16429
16430 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16431 then replicate the value for all elements of the vector
16432 register. */
16433
16434 rtx
16435 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16436 {
16437 rtvec v;
16438 switch (mode)
16439 {
16440 case V4SImode:
16441 gcc_assert (vect);
16442 v = gen_rtvec (4, value, value, value, value);
16443 return gen_rtx_CONST_VECTOR (V4SImode, v);
16444
16445 case V2DImode:
16446 gcc_assert (vect);
16447 v = gen_rtvec (2, value, value);
16448 return gen_rtx_CONST_VECTOR (V2DImode, v);
16449
16450 case V8SFmode:
16451 if (vect)
16452 v = gen_rtvec (8, value, value, value, value,
16453 value, value, value, value);
16454 else
16455 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16456 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16457 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16458 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16459 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16460
16461 case V4SFmode:
16462 if (vect)
16463 v = gen_rtvec (4, value, value, value, value);
16464 else
16465 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16466 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16467 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16468
16469 case V4DFmode:
16470 if (vect)
16471 v = gen_rtvec (4, value, value, value, value);
16472 else
16473 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16474 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16475 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16476
16477 case V2DFmode:
16478 if (vect)
16479 v = gen_rtvec (2, value, value);
16480 else
16481 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16482 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16483
16484 default:
16485 gcc_unreachable ();
16486 }
16487 }
16488
16489 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16490 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16491 for an SSE register. If VECT is true, then replicate the mask for
16492 all elements of the vector register. If INVERT is true, then create
16493 a mask excluding the sign bit. */
16494
16495 rtx
16496 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16497 {
16498 enum machine_mode vec_mode, imode;
16499 HOST_WIDE_INT hi, lo;
16500 int shift = 63;
16501 rtx v;
16502 rtx mask;
16503
16504 /* Find the sign bit, sign extended to 2*HWI. */
16505 switch (mode)
16506 {
16507 case V4SImode:
16508 case V8SFmode:
16509 case V4SFmode:
16510 vec_mode = mode;
16511 mode = GET_MODE_INNER (mode);
16512 imode = SImode;
16513 lo = 0x80000000, hi = lo < 0;
16514 break;
16515
16516 case V2DImode:
16517 case V4DFmode:
16518 case V2DFmode:
16519 vec_mode = mode;
16520 mode = GET_MODE_INNER (mode);
16521 imode = DImode;
16522 if (HOST_BITS_PER_WIDE_INT >= 64)
16523 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16524 else
16525 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16526 break;
16527
16528 case TImode:
16529 case TFmode:
16530 vec_mode = VOIDmode;
16531 if (HOST_BITS_PER_WIDE_INT >= 64)
16532 {
16533 imode = TImode;
16534 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16535 }
16536 else
16537 {
16538 rtvec vec;
16539
16540 imode = DImode;
16541 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16542
16543 if (invert)
16544 {
16545 lo = ~lo, hi = ~hi;
16546 v = constm1_rtx;
16547 }
16548 else
16549 v = const0_rtx;
16550
16551 mask = immed_double_const (lo, hi, imode);
16552
16553 vec = gen_rtvec (2, v, mask);
16554 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16555 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16556
16557 return v;
16558 }
16559 break;
16560
16561 default:
16562 gcc_unreachable ();
16563 }
16564
16565 if (invert)
16566 lo = ~lo, hi = ~hi;
16567
16568 /* Force this value into the low part of a fp vector constant. */
16569 mask = immed_double_const (lo, hi, imode);
16570 mask = gen_lowpart (mode, mask);
16571
16572 if (vec_mode == VOIDmode)
16573 return force_reg (mode, mask);
16574
16575 v = ix86_build_const_vector (vec_mode, vect, mask);
16576 return force_reg (vec_mode, v);
16577 }
16578
16579 /* Generate code for floating point ABS or NEG. */
16580
16581 void
16582 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16583 rtx operands[])
16584 {
16585 rtx mask, set, dst, src;
16586 bool use_sse = false;
16587 bool vector_mode = VECTOR_MODE_P (mode);
16588 enum machine_mode vmode = mode;
16589
16590 if (vector_mode)
16591 use_sse = true;
16592 else if (mode == TFmode)
16593 use_sse = true;
16594 else if (TARGET_SSE_MATH)
16595 {
16596 use_sse = SSE_FLOAT_MODE_P (mode);
16597 if (mode == SFmode)
16598 vmode = V4SFmode;
16599 else if (mode == DFmode)
16600 vmode = V2DFmode;
16601 }
16602
16603 /* NEG and ABS performed with SSE use bitwise mask operations.
16604 Create the appropriate mask now. */
16605 if (use_sse)
16606 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16607 else
16608 mask = NULL_RTX;
16609
16610 dst = operands[0];
16611 src = operands[1];
16612
16613 set = gen_rtx_fmt_e (code, mode, src);
16614 set = gen_rtx_SET (VOIDmode, dst, set);
16615
16616 if (mask)
16617 {
16618 rtx use, clob;
16619 rtvec par;
16620
16621 use = gen_rtx_USE (VOIDmode, mask);
16622 if (vector_mode)
16623 par = gen_rtvec (2, set, use);
16624 else
16625 {
16626 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16627 par = gen_rtvec (3, set, use, clob);
16628 }
16629 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16630 }
16631 else
16632 emit_insn (set);
16633 }
16634
16635 /* Expand a copysign operation. Special case operand 0 being a constant. */
16636
16637 void
16638 ix86_expand_copysign (rtx operands[])
16639 {
16640 enum machine_mode mode, vmode;
16641 rtx dest, op0, op1, mask, nmask;
16642
16643 dest = operands[0];
16644 op0 = operands[1];
16645 op1 = operands[2];
16646
16647 mode = GET_MODE (dest);
16648
16649 if (mode == SFmode)
16650 vmode = V4SFmode;
16651 else if (mode == DFmode)
16652 vmode = V2DFmode;
16653 else
16654 vmode = mode;
16655
16656 if (GET_CODE (op0) == CONST_DOUBLE)
16657 {
16658 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16659
16660 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16661 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16662
16663 if (mode == SFmode || mode == DFmode)
16664 {
16665 if (op0 == CONST0_RTX (mode))
16666 op0 = CONST0_RTX (vmode);
16667 else
16668 {
16669 rtx v = ix86_build_const_vector (vmode, false, op0);
16670
16671 op0 = force_reg (vmode, v);
16672 }
16673 }
16674 else if (op0 != CONST0_RTX (mode))
16675 op0 = force_reg (mode, op0);
16676
16677 mask = ix86_build_signbit_mask (vmode, 0, 0);
16678
16679 if (mode == SFmode)
16680 copysign_insn = gen_copysignsf3_const;
16681 else if (mode == DFmode)
16682 copysign_insn = gen_copysigndf3_const;
16683 else
16684 copysign_insn = gen_copysigntf3_const;
16685
16686 emit_insn (copysign_insn (dest, op0, op1, mask));
16687 }
16688 else
16689 {
16690 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16691
16692 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16693 mask = ix86_build_signbit_mask (vmode, 0, 0);
16694
16695 if (mode == SFmode)
16696 copysign_insn = gen_copysignsf3_var;
16697 else if (mode == DFmode)
16698 copysign_insn = gen_copysigndf3_var;
16699 else
16700 copysign_insn = gen_copysigntf3_var;
16701
16702 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16703 }
16704 }
16705
16706 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16707 be a constant, and so has already been expanded into a vector constant. */
16708
16709 void
16710 ix86_split_copysign_const (rtx operands[])
16711 {
16712 enum machine_mode mode, vmode;
16713 rtx dest, op0, mask, x;
16714
16715 dest = operands[0];
16716 op0 = operands[1];
16717 mask = operands[3];
16718
16719 mode = GET_MODE (dest);
16720 vmode = GET_MODE (mask);
16721
16722 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16723 x = gen_rtx_AND (vmode, dest, mask);
16724 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16725
16726 if (op0 != CONST0_RTX (vmode))
16727 {
16728 x = gen_rtx_IOR (vmode, dest, op0);
16729 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16730 }
16731 }
16732
16733 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16734 so we have to do two masks. */
16735
16736 void
16737 ix86_split_copysign_var (rtx operands[])
16738 {
16739 enum machine_mode mode, vmode;
16740 rtx dest, scratch, op0, op1, mask, nmask, x;
16741
16742 dest = operands[0];
16743 scratch = operands[1];
16744 op0 = operands[2];
16745 op1 = operands[3];
16746 nmask = operands[4];
16747 mask = operands[5];
16748
16749 mode = GET_MODE (dest);
16750 vmode = GET_MODE (mask);
16751
16752 if (rtx_equal_p (op0, op1))
16753 {
16754 /* Shouldn't happen often (it's useless, obviously), but when it does
16755 we'd generate incorrect code if we continue below. */
16756 emit_move_insn (dest, op0);
16757 return;
16758 }
16759
16760 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16761 {
16762 gcc_assert (REGNO (op1) == REGNO (scratch));
16763
16764 x = gen_rtx_AND (vmode, scratch, mask);
16765 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16766
16767 dest = mask;
16768 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16769 x = gen_rtx_NOT (vmode, dest);
16770 x = gen_rtx_AND (vmode, x, op0);
16771 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16772 }
16773 else
16774 {
16775 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16776 {
16777 x = gen_rtx_AND (vmode, scratch, mask);
16778 }
16779 else /* alternative 2,4 */
16780 {
16781 gcc_assert (REGNO (mask) == REGNO (scratch));
16782 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16783 x = gen_rtx_AND (vmode, scratch, op1);
16784 }
16785 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16786
16787 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16788 {
16789 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16790 x = gen_rtx_AND (vmode, dest, nmask);
16791 }
16792 else /* alternative 3,4 */
16793 {
16794 gcc_assert (REGNO (nmask) == REGNO (dest));
16795 dest = nmask;
16796 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16797 x = gen_rtx_AND (vmode, dest, op0);
16798 }
16799 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16800 }
16801
16802 x = gen_rtx_IOR (vmode, dest, scratch);
16803 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16804 }
16805
16806 /* Return TRUE or FALSE depending on whether the first SET in INSN
16807 has source and destination with matching CC modes, and that the
16808 CC mode is at least as constrained as REQ_MODE. */
16809
16810 bool
16811 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16812 {
16813 rtx set;
16814 enum machine_mode set_mode;
16815
16816 set = PATTERN (insn);
16817 if (GET_CODE (set) == PARALLEL)
16818 set = XVECEXP (set, 0, 0);
16819 gcc_assert (GET_CODE (set) == SET);
16820 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16821
16822 set_mode = GET_MODE (SET_DEST (set));
16823 switch (set_mode)
16824 {
16825 case CCNOmode:
16826 if (req_mode != CCNOmode
16827 && (req_mode != CCmode
16828 || XEXP (SET_SRC (set), 1) != const0_rtx))
16829 return false;
16830 break;
16831 case CCmode:
16832 if (req_mode == CCGCmode)
16833 return false;
16834 /* FALLTHRU */
16835 case CCGCmode:
16836 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16837 return false;
16838 /* FALLTHRU */
16839 case CCGOCmode:
16840 if (req_mode == CCZmode)
16841 return false;
16842 /* FALLTHRU */
16843 case CCZmode:
16844 break;
16845
16846 case CCAmode:
16847 case CCCmode:
16848 case CCOmode:
16849 case CCSmode:
16850 if (set_mode != req_mode)
16851 return false;
16852 break;
16853
16854 default:
16855 gcc_unreachable ();
16856 }
16857
16858 return GET_MODE (SET_SRC (set)) == set_mode;
16859 }
16860
16861 /* Generate insn patterns to do an integer compare of OPERANDS. */
16862
16863 static rtx
16864 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16865 {
16866 enum machine_mode cmpmode;
16867 rtx tmp, flags;
16868
16869 cmpmode = SELECT_CC_MODE (code, op0, op1);
16870 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16871
16872 /* This is very simple, but making the interface the same as in the
16873 FP case makes the rest of the code easier. */
16874 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16875 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16876
16877 /* Return the test that should be put into the flags user, i.e.
16878 the bcc, scc, or cmov instruction. */
16879 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16880 }
16881
16882 /* Figure out whether to use ordered or unordered fp comparisons.
16883 Return the appropriate mode to use. */
16884
16885 enum machine_mode
16886 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16887 {
16888 /* ??? In order to make all comparisons reversible, we do all comparisons
16889 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16890 all forms trapping and nontrapping comparisons, we can make inequality
16891 comparisons trapping again, since it results in better code when using
16892 FCOM based compares. */
16893 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16894 }
16895
16896 enum machine_mode
16897 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16898 {
16899 enum machine_mode mode = GET_MODE (op0);
16900
16901 if (SCALAR_FLOAT_MODE_P (mode))
16902 {
16903 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16904 return ix86_fp_compare_mode (code);
16905 }
16906
16907 switch (code)
16908 {
16909 /* Only zero flag is needed. */
16910 case EQ: /* ZF=0 */
16911 case NE: /* ZF!=0 */
16912 return CCZmode;
16913 /* Codes needing carry flag. */
16914 case GEU: /* CF=0 */
16915 case LTU: /* CF=1 */
16916 /* Detect overflow checks. They need just the carry flag. */
16917 if (GET_CODE (op0) == PLUS
16918 && rtx_equal_p (op1, XEXP (op0, 0)))
16919 return CCCmode;
16920 else
16921 return CCmode;
16922 case GTU: /* CF=0 & ZF=0 */
16923 case LEU: /* CF=1 | ZF=1 */
16924 /* Detect overflow checks. They need just the carry flag. */
16925 if (GET_CODE (op0) == MINUS
16926 && rtx_equal_p (op1, XEXP (op0, 0)))
16927 return CCCmode;
16928 else
16929 return CCmode;
16930 /* Codes possibly doable only with sign flag when
16931 comparing against zero. */
16932 case GE: /* SF=OF or SF=0 */
16933 case LT: /* SF<>OF or SF=1 */
16934 if (op1 == const0_rtx)
16935 return CCGOCmode;
16936 else
16937 /* For other cases Carry flag is not required. */
16938 return CCGCmode;
16939 /* Codes doable only with sign flag when comparing
16940 against zero, but we miss jump instruction for it
16941 so we need to use relational tests against overflow
16942 that thus needs to be zero. */
16943 case GT: /* ZF=0 & SF=OF */
16944 case LE: /* ZF=1 | SF<>OF */
16945 if (op1 == const0_rtx)
16946 return CCNOmode;
16947 else
16948 return CCGCmode;
16949 /* strcmp pattern do (use flags) and combine may ask us for proper
16950 mode. */
16951 case USE:
16952 return CCmode;
16953 default:
16954 gcc_unreachable ();
16955 }
16956 }
16957
16958 /* Return the fixed registers used for condition codes. */
16959
16960 static bool
16961 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16962 {
16963 *p1 = FLAGS_REG;
16964 *p2 = FPSR_REG;
16965 return true;
16966 }
16967
16968 /* If two condition code modes are compatible, return a condition code
16969 mode which is compatible with both. Otherwise, return
16970 VOIDmode. */
16971
16972 static enum machine_mode
16973 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16974 {
16975 if (m1 == m2)
16976 return m1;
16977
16978 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16979 return VOIDmode;
16980
16981 if ((m1 == CCGCmode && m2 == CCGOCmode)
16982 || (m1 == CCGOCmode && m2 == CCGCmode))
16983 return CCGCmode;
16984
16985 switch (m1)
16986 {
16987 default:
16988 gcc_unreachable ();
16989
16990 case CCmode:
16991 case CCGCmode:
16992 case CCGOCmode:
16993 case CCNOmode:
16994 case CCAmode:
16995 case CCCmode:
16996 case CCOmode:
16997 case CCSmode:
16998 case CCZmode:
16999 switch (m2)
17000 {
17001 default:
17002 return VOIDmode;
17003
17004 case CCmode:
17005 case CCGCmode:
17006 case CCGOCmode:
17007 case CCNOmode:
17008 case CCAmode:
17009 case CCCmode:
17010 case CCOmode:
17011 case CCSmode:
17012 case CCZmode:
17013 return CCmode;
17014 }
17015
17016 case CCFPmode:
17017 case CCFPUmode:
17018 /* These are only compatible with themselves, which we already
17019 checked above. */
17020 return VOIDmode;
17021 }
17022 }
17023
17024
17025 /* Return a comparison we can do and that it is equivalent to
17026 swap_condition (code) apart possibly from orderedness.
17027 But, never change orderedness if TARGET_IEEE_FP, returning
17028 UNKNOWN in that case if necessary. */
17029
17030 static enum rtx_code
17031 ix86_fp_swap_condition (enum rtx_code code)
17032 {
17033 switch (code)
17034 {
17035 case GT: /* GTU - CF=0 & ZF=0 */
17036 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17037 case GE: /* GEU - CF=0 */
17038 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17039 case UNLT: /* LTU - CF=1 */
17040 return TARGET_IEEE_FP ? UNKNOWN : GT;
17041 case UNLE: /* LEU - CF=1 | ZF=1 */
17042 return TARGET_IEEE_FP ? UNKNOWN : GE;
17043 default:
17044 return swap_condition (code);
17045 }
17046 }
17047
17048 /* Return cost of comparison CODE using the best strategy for performance.
17049 All following functions do use number of instructions as a cost metrics.
17050 In future this should be tweaked to compute bytes for optimize_size and
17051 take into account performance of various instructions on various CPUs. */
17052
17053 static int
17054 ix86_fp_comparison_cost (enum rtx_code code)
17055 {
17056 int arith_cost;
17057
17058 /* The cost of code using bit-twiddling on %ah. */
17059 switch (code)
17060 {
17061 case UNLE:
17062 case UNLT:
17063 case LTGT:
17064 case GT:
17065 case GE:
17066 case UNORDERED:
17067 case ORDERED:
17068 case UNEQ:
17069 arith_cost = 4;
17070 break;
17071 case LT:
17072 case NE:
17073 case EQ:
17074 case UNGE:
17075 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17076 break;
17077 case LE:
17078 case UNGT:
17079 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17080 break;
17081 default:
17082 gcc_unreachable ();
17083 }
17084
17085 switch (ix86_fp_comparison_strategy (code))
17086 {
17087 case IX86_FPCMP_COMI:
17088 return arith_cost > 4 ? 3 : 2;
17089 case IX86_FPCMP_SAHF:
17090 return arith_cost > 4 ? 4 : 3;
17091 default:
17092 return arith_cost;
17093 }
17094 }
17095
17096 /* Return strategy to use for floating-point. We assume that fcomi is always
17097 preferrable where available, since that is also true when looking at size
17098 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17099
17100 enum ix86_fpcmp_strategy
17101 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17102 {
17103 /* Do fcomi/sahf based test when profitable. */
17104
17105 if (TARGET_CMOVE)
17106 return IX86_FPCMP_COMI;
17107
17108 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17109 return IX86_FPCMP_SAHF;
17110
17111 return IX86_FPCMP_ARITH;
17112 }
17113
17114 /* Swap, force into registers, or otherwise massage the two operands
17115 to a fp comparison. The operands are updated in place; the new
17116 comparison code is returned. */
17117
17118 static enum rtx_code
17119 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17120 {
17121 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17122 rtx op0 = *pop0, op1 = *pop1;
17123 enum machine_mode op_mode = GET_MODE (op0);
17124 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17125
17126 /* All of the unordered compare instructions only work on registers.
17127 The same is true of the fcomi compare instructions. The XFmode
17128 compare instructions require registers except when comparing
17129 against zero or when converting operand 1 from fixed point to
17130 floating point. */
17131
17132 if (!is_sse
17133 && (fpcmp_mode == CCFPUmode
17134 || (op_mode == XFmode
17135 && ! (standard_80387_constant_p (op0) == 1
17136 || standard_80387_constant_p (op1) == 1)
17137 && GET_CODE (op1) != FLOAT)
17138 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17139 {
17140 op0 = force_reg (op_mode, op0);
17141 op1 = force_reg (op_mode, op1);
17142 }
17143 else
17144 {
17145 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17146 things around if they appear profitable, otherwise force op0
17147 into a register. */
17148
17149 if (standard_80387_constant_p (op0) == 0
17150 || (MEM_P (op0)
17151 && ! (standard_80387_constant_p (op1) == 0
17152 || MEM_P (op1))))
17153 {
17154 enum rtx_code new_code = ix86_fp_swap_condition (code);
17155 if (new_code != UNKNOWN)
17156 {
17157 rtx tmp;
17158 tmp = op0, op0 = op1, op1 = tmp;
17159 code = new_code;
17160 }
17161 }
17162
17163 if (!REG_P (op0))
17164 op0 = force_reg (op_mode, op0);
17165
17166 if (CONSTANT_P (op1))
17167 {
17168 int tmp = standard_80387_constant_p (op1);
17169 if (tmp == 0)
17170 op1 = validize_mem (force_const_mem (op_mode, op1));
17171 else if (tmp == 1)
17172 {
17173 if (TARGET_CMOVE)
17174 op1 = force_reg (op_mode, op1);
17175 }
17176 else
17177 op1 = force_reg (op_mode, op1);
17178 }
17179 }
17180
17181 /* Try to rearrange the comparison to make it cheaper. */
17182 if (ix86_fp_comparison_cost (code)
17183 > ix86_fp_comparison_cost (swap_condition (code))
17184 && (REG_P (op1) || can_create_pseudo_p ()))
17185 {
17186 rtx tmp;
17187 tmp = op0, op0 = op1, op1 = tmp;
17188 code = swap_condition (code);
17189 if (!REG_P (op0))
17190 op0 = force_reg (op_mode, op0);
17191 }
17192
17193 *pop0 = op0;
17194 *pop1 = op1;
17195 return code;
17196 }
17197
17198 /* Convert comparison codes we use to represent FP comparison to integer
17199 code that will result in proper branch. Return UNKNOWN if no such code
17200 is available. */
17201
17202 enum rtx_code
17203 ix86_fp_compare_code_to_integer (enum rtx_code code)
17204 {
17205 switch (code)
17206 {
17207 case GT:
17208 return GTU;
17209 case GE:
17210 return GEU;
17211 case ORDERED:
17212 case UNORDERED:
17213 return code;
17214 break;
17215 case UNEQ:
17216 return EQ;
17217 break;
17218 case UNLT:
17219 return LTU;
17220 break;
17221 case UNLE:
17222 return LEU;
17223 break;
17224 case LTGT:
17225 return NE;
17226 break;
17227 default:
17228 return UNKNOWN;
17229 }
17230 }
17231
17232 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17233
17234 static rtx
17235 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17236 {
17237 enum machine_mode fpcmp_mode, intcmp_mode;
17238 rtx tmp, tmp2;
17239
17240 fpcmp_mode = ix86_fp_compare_mode (code);
17241 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17242
17243 /* Do fcomi/sahf based test when profitable. */
17244 switch (ix86_fp_comparison_strategy (code))
17245 {
17246 case IX86_FPCMP_COMI:
17247 intcmp_mode = fpcmp_mode;
17248 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17249 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17250 tmp);
17251 emit_insn (tmp);
17252 break;
17253
17254 case IX86_FPCMP_SAHF:
17255 intcmp_mode = fpcmp_mode;
17256 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17257 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17258 tmp);
17259
17260 if (!scratch)
17261 scratch = gen_reg_rtx (HImode);
17262 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17264 break;
17265
17266 case IX86_FPCMP_ARITH:
17267 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17268 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17269 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17270 if (!scratch)
17271 scratch = gen_reg_rtx (HImode);
17272 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17273
17274 /* In the unordered case, we have to check C2 for NaN's, which
17275 doesn't happen to work out to anything nice combination-wise.
17276 So do some bit twiddling on the value we've got in AH to come
17277 up with an appropriate set of condition codes. */
17278
17279 intcmp_mode = CCNOmode;
17280 switch (code)
17281 {
17282 case GT:
17283 case UNGT:
17284 if (code == GT || !TARGET_IEEE_FP)
17285 {
17286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17287 code = EQ;
17288 }
17289 else
17290 {
17291 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17292 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17293 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17294 intcmp_mode = CCmode;
17295 code = GEU;
17296 }
17297 break;
17298 case LT:
17299 case UNLT:
17300 if (code == LT && TARGET_IEEE_FP)
17301 {
17302 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17303 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17304 intcmp_mode = CCmode;
17305 code = EQ;
17306 }
17307 else
17308 {
17309 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17310 code = NE;
17311 }
17312 break;
17313 case GE:
17314 case UNGE:
17315 if (code == GE || !TARGET_IEEE_FP)
17316 {
17317 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17318 code = EQ;
17319 }
17320 else
17321 {
17322 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17323 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17324 code = NE;
17325 }
17326 break;
17327 case LE:
17328 case UNLE:
17329 if (code == LE && TARGET_IEEE_FP)
17330 {
17331 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17332 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17333 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17334 intcmp_mode = CCmode;
17335 code = LTU;
17336 }
17337 else
17338 {
17339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17340 code = NE;
17341 }
17342 break;
17343 case EQ:
17344 case UNEQ:
17345 if (code == EQ && TARGET_IEEE_FP)
17346 {
17347 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17348 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17349 intcmp_mode = CCmode;
17350 code = EQ;
17351 }
17352 else
17353 {
17354 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17355 code = NE;
17356 }
17357 break;
17358 case NE:
17359 case LTGT:
17360 if (code == NE && TARGET_IEEE_FP)
17361 {
17362 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17363 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17364 GEN_INT (0x40)));
17365 code = NE;
17366 }
17367 else
17368 {
17369 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17370 code = EQ;
17371 }
17372 break;
17373
17374 case UNORDERED:
17375 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17376 code = NE;
17377 break;
17378 case ORDERED:
17379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17380 code = EQ;
17381 break;
17382
17383 default:
17384 gcc_unreachable ();
17385 }
17386 break;
17387
17388 default:
17389 gcc_unreachable();
17390 }
17391
17392 /* Return the test that should be put into the flags user, i.e.
17393 the bcc, scc, or cmov instruction. */
17394 return gen_rtx_fmt_ee (code, VOIDmode,
17395 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17396 const0_rtx);
17397 }
17398
17399 static rtx
17400 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17401 {
17402 rtx ret;
17403
17404 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17405 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17406
17407 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17408 {
17409 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17410 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17411 }
17412 else
17413 ret = ix86_expand_int_compare (code, op0, op1);
17414
17415 return ret;
17416 }
17417
17418 void
17419 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17420 {
17421 enum machine_mode mode = GET_MODE (op0);
17422 rtx tmp;
17423
17424 switch (mode)
17425 {
17426 case SFmode:
17427 case DFmode:
17428 case XFmode:
17429 case QImode:
17430 case HImode:
17431 case SImode:
17432 simple:
17433 tmp = ix86_expand_compare (code, op0, op1);
17434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17435 gen_rtx_LABEL_REF (VOIDmode, label),
17436 pc_rtx);
17437 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17438 return;
17439
17440 case DImode:
17441 if (TARGET_64BIT)
17442 goto simple;
17443 case TImode:
17444 /* Expand DImode branch into multiple compare+branch. */
17445 {
17446 rtx lo[2], hi[2], label2;
17447 enum rtx_code code1, code2, code3;
17448 enum machine_mode submode;
17449
17450 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17451 {
17452 tmp = op0, op0 = op1, op1 = tmp;
17453 code = swap_condition (code);
17454 }
17455
17456 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17457 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17458
17459 submode = mode == DImode ? SImode : DImode;
17460
17461 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17462 avoid two branches. This costs one extra insn, so disable when
17463 optimizing for size. */
17464
17465 if ((code == EQ || code == NE)
17466 && (!optimize_insn_for_size_p ()
17467 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17468 {
17469 rtx xor0, xor1;
17470
17471 xor1 = hi[0];
17472 if (hi[1] != const0_rtx)
17473 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17474 NULL_RTX, 0, OPTAB_WIDEN);
17475
17476 xor0 = lo[0];
17477 if (lo[1] != const0_rtx)
17478 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17479 NULL_RTX, 0, OPTAB_WIDEN);
17480
17481 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17482 NULL_RTX, 0, OPTAB_WIDEN);
17483
17484 ix86_expand_branch (code, tmp, const0_rtx, label);
17485 return;
17486 }
17487
17488 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17489 op1 is a constant and the low word is zero, then we can just
17490 examine the high word. Similarly for low word -1 and
17491 less-or-equal-than or greater-than. */
17492
17493 if (CONST_INT_P (hi[1]))
17494 switch (code)
17495 {
17496 case LT: case LTU: case GE: case GEU:
17497 if (lo[1] == const0_rtx)
17498 {
17499 ix86_expand_branch (code, hi[0], hi[1], label);
17500 return;
17501 }
17502 break;
17503 case LE: case LEU: case GT: case GTU:
17504 if (lo[1] == constm1_rtx)
17505 {
17506 ix86_expand_branch (code, hi[0], hi[1], label);
17507 return;
17508 }
17509 break;
17510 default:
17511 break;
17512 }
17513
17514 /* Otherwise, we need two or three jumps. */
17515
17516 label2 = gen_label_rtx ();
17517
17518 code1 = code;
17519 code2 = swap_condition (code);
17520 code3 = unsigned_condition (code);
17521
17522 switch (code)
17523 {
17524 case LT: case GT: case LTU: case GTU:
17525 break;
17526
17527 case LE: code1 = LT; code2 = GT; break;
17528 case GE: code1 = GT; code2 = LT; break;
17529 case LEU: code1 = LTU; code2 = GTU; break;
17530 case GEU: code1 = GTU; code2 = LTU; break;
17531
17532 case EQ: code1 = UNKNOWN; code2 = NE; break;
17533 case NE: code2 = UNKNOWN; break;
17534
17535 default:
17536 gcc_unreachable ();
17537 }
17538
17539 /*
17540 * a < b =>
17541 * if (hi(a) < hi(b)) goto true;
17542 * if (hi(a) > hi(b)) goto false;
17543 * if (lo(a) < lo(b)) goto true;
17544 * false:
17545 */
17546
17547 if (code1 != UNKNOWN)
17548 ix86_expand_branch (code1, hi[0], hi[1], label);
17549 if (code2 != UNKNOWN)
17550 ix86_expand_branch (code2, hi[0], hi[1], label2);
17551
17552 ix86_expand_branch (code3, lo[0], lo[1], label);
17553
17554 if (code2 != UNKNOWN)
17555 emit_label (label2);
17556 return;
17557 }
17558
17559 default:
17560 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17561 goto simple;
17562 }
17563 }
17564
17565 /* Split branch based on floating point condition. */
17566 void
17567 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17568 rtx target1, rtx target2, rtx tmp, rtx pushed)
17569 {
17570 rtx condition;
17571 rtx i;
17572
17573 if (target2 != pc_rtx)
17574 {
17575 rtx tmp = target2;
17576 code = reverse_condition_maybe_unordered (code);
17577 target2 = target1;
17578 target1 = tmp;
17579 }
17580
17581 condition = ix86_expand_fp_compare (code, op1, op2,
17582 tmp);
17583
17584 /* Remove pushed operand from stack. */
17585 if (pushed)
17586 ix86_free_from_memory (GET_MODE (pushed));
17587
17588 i = emit_jump_insn (gen_rtx_SET
17589 (VOIDmode, pc_rtx,
17590 gen_rtx_IF_THEN_ELSE (VOIDmode,
17591 condition, target1, target2)));
17592 if (split_branch_probability >= 0)
17593 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17594 }
17595
17596 void
17597 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17598 {
17599 rtx ret;
17600
17601 gcc_assert (GET_MODE (dest) == QImode);
17602
17603 ret = ix86_expand_compare (code, op0, op1);
17604 PUT_MODE (ret, QImode);
17605 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17606 }
17607
17608 /* Expand comparison setting or clearing carry flag. Return true when
17609 successful and set pop for the operation. */
17610 static bool
17611 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17612 {
17613 enum machine_mode mode =
17614 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17615
17616 /* Do not handle double-mode compares that go through special path. */
17617 if (mode == (TARGET_64BIT ? TImode : DImode))
17618 return false;
17619
17620 if (SCALAR_FLOAT_MODE_P (mode))
17621 {
17622 rtx compare_op, compare_seq;
17623
17624 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17625
17626 /* Shortcut: following common codes never translate
17627 into carry flag compares. */
17628 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17629 || code == ORDERED || code == UNORDERED)
17630 return false;
17631
17632 /* These comparisons require zero flag; swap operands so they won't. */
17633 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17634 && !TARGET_IEEE_FP)
17635 {
17636 rtx tmp = op0;
17637 op0 = op1;
17638 op1 = tmp;
17639 code = swap_condition (code);
17640 }
17641
17642 /* Try to expand the comparison and verify that we end up with
17643 carry flag based comparison. This fails to be true only when
17644 we decide to expand comparison using arithmetic that is not
17645 too common scenario. */
17646 start_sequence ();
17647 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17648 compare_seq = get_insns ();
17649 end_sequence ();
17650
17651 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17652 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17653 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17654 else
17655 code = GET_CODE (compare_op);
17656
17657 if (code != LTU && code != GEU)
17658 return false;
17659
17660 emit_insn (compare_seq);
17661 *pop = compare_op;
17662 return true;
17663 }
17664
17665 if (!INTEGRAL_MODE_P (mode))
17666 return false;
17667
17668 switch (code)
17669 {
17670 case LTU:
17671 case GEU:
17672 break;
17673
17674 /* Convert a==0 into (unsigned)a<1. */
17675 case EQ:
17676 case NE:
17677 if (op1 != const0_rtx)
17678 return false;
17679 op1 = const1_rtx;
17680 code = (code == EQ ? LTU : GEU);
17681 break;
17682
17683 /* Convert a>b into b<a or a>=b-1. */
17684 case GTU:
17685 case LEU:
17686 if (CONST_INT_P (op1))
17687 {
17688 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17689 /* Bail out on overflow. We still can swap operands but that
17690 would force loading of the constant into register. */
17691 if (op1 == const0_rtx
17692 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17693 return false;
17694 code = (code == GTU ? GEU : LTU);
17695 }
17696 else
17697 {
17698 rtx tmp = op1;
17699 op1 = op0;
17700 op0 = tmp;
17701 code = (code == GTU ? LTU : GEU);
17702 }
17703 break;
17704
17705 /* Convert a>=0 into (unsigned)a<0x80000000. */
17706 case LT:
17707 case GE:
17708 if (mode == DImode || op1 != const0_rtx)
17709 return false;
17710 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17711 code = (code == LT ? GEU : LTU);
17712 break;
17713 case LE:
17714 case GT:
17715 if (mode == DImode || op1 != constm1_rtx)
17716 return false;
17717 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17718 code = (code == LE ? GEU : LTU);
17719 break;
17720
17721 default:
17722 return false;
17723 }
17724 /* Swapping operands may cause constant to appear as first operand. */
17725 if (!nonimmediate_operand (op0, VOIDmode))
17726 {
17727 if (!can_create_pseudo_p ())
17728 return false;
17729 op0 = force_reg (mode, op0);
17730 }
17731 *pop = ix86_expand_compare (code, op0, op1);
17732 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17733 return true;
17734 }
17735
17736 bool
17737 ix86_expand_int_movcc (rtx operands[])
17738 {
17739 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17740 rtx compare_seq, compare_op;
17741 enum machine_mode mode = GET_MODE (operands[0]);
17742 bool sign_bit_compare_p = false;
17743 rtx op0 = XEXP (operands[1], 0);
17744 rtx op1 = XEXP (operands[1], 1);
17745
17746 start_sequence ();
17747 compare_op = ix86_expand_compare (code, op0, op1);
17748 compare_seq = get_insns ();
17749 end_sequence ();
17750
17751 compare_code = GET_CODE (compare_op);
17752
17753 if ((op1 == const0_rtx && (code == GE || code == LT))
17754 || (op1 == constm1_rtx && (code == GT || code == LE)))
17755 sign_bit_compare_p = true;
17756
17757 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17758 HImode insns, we'd be swallowed in word prefix ops. */
17759
17760 if ((mode != HImode || TARGET_FAST_PREFIX)
17761 && (mode != (TARGET_64BIT ? TImode : DImode))
17762 && CONST_INT_P (operands[2])
17763 && CONST_INT_P (operands[3]))
17764 {
17765 rtx out = operands[0];
17766 HOST_WIDE_INT ct = INTVAL (operands[2]);
17767 HOST_WIDE_INT cf = INTVAL (operands[3]);
17768 HOST_WIDE_INT diff;
17769
17770 diff = ct - cf;
17771 /* Sign bit compares are better done using shifts than we do by using
17772 sbb. */
17773 if (sign_bit_compare_p
17774 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17775 {
17776 /* Detect overlap between destination and compare sources. */
17777 rtx tmp = out;
17778
17779 if (!sign_bit_compare_p)
17780 {
17781 rtx flags;
17782 bool fpcmp = false;
17783
17784 compare_code = GET_CODE (compare_op);
17785
17786 flags = XEXP (compare_op, 0);
17787
17788 if (GET_MODE (flags) == CCFPmode
17789 || GET_MODE (flags) == CCFPUmode)
17790 {
17791 fpcmp = true;
17792 compare_code
17793 = ix86_fp_compare_code_to_integer (compare_code);
17794 }
17795
17796 /* To simplify rest of code, restrict to the GEU case. */
17797 if (compare_code == LTU)
17798 {
17799 HOST_WIDE_INT tmp = ct;
17800 ct = cf;
17801 cf = tmp;
17802 compare_code = reverse_condition (compare_code);
17803 code = reverse_condition (code);
17804 }
17805 else
17806 {
17807 if (fpcmp)
17808 PUT_CODE (compare_op,
17809 reverse_condition_maybe_unordered
17810 (GET_CODE (compare_op)));
17811 else
17812 PUT_CODE (compare_op,
17813 reverse_condition (GET_CODE (compare_op)));
17814 }
17815 diff = ct - cf;
17816
17817 if (reg_overlap_mentioned_p (out, op0)
17818 || reg_overlap_mentioned_p (out, op1))
17819 tmp = gen_reg_rtx (mode);
17820
17821 if (mode == DImode)
17822 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17823 else
17824 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17825 flags, compare_op));
17826 }
17827 else
17828 {
17829 if (code == GT || code == GE)
17830 code = reverse_condition (code);
17831 else
17832 {
17833 HOST_WIDE_INT tmp = ct;
17834 ct = cf;
17835 cf = tmp;
17836 diff = ct - cf;
17837 }
17838 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17839 }
17840
17841 if (diff == 1)
17842 {
17843 /*
17844 * cmpl op0,op1
17845 * sbbl dest,dest
17846 * [addl dest, ct]
17847 *
17848 * Size 5 - 8.
17849 */
17850 if (ct)
17851 tmp = expand_simple_binop (mode, PLUS,
17852 tmp, GEN_INT (ct),
17853 copy_rtx (tmp), 1, OPTAB_DIRECT);
17854 }
17855 else if (cf == -1)
17856 {
17857 /*
17858 * cmpl op0,op1
17859 * sbbl dest,dest
17860 * orl $ct, dest
17861 *
17862 * Size 8.
17863 */
17864 tmp = expand_simple_binop (mode, IOR,
17865 tmp, GEN_INT (ct),
17866 copy_rtx (tmp), 1, OPTAB_DIRECT);
17867 }
17868 else if (diff == -1 && ct)
17869 {
17870 /*
17871 * cmpl op0,op1
17872 * sbbl dest,dest
17873 * notl dest
17874 * [addl dest, cf]
17875 *
17876 * Size 8 - 11.
17877 */
17878 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17879 if (cf)
17880 tmp = expand_simple_binop (mode, PLUS,
17881 copy_rtx (tmp), GEN_INT (cf),
17882 copy_rtx (tmp), 1, OPTAB_DIRECT);
17883 }
17884 else
17885 {
17886 /*
17887 * cmpl op0,op1
17888 * sbbl dest,dest
17889 * [notl dest]
17890 * andl cf - ct, dest
17891 * [addl dest, ct]
17892 *
17893 * Size 8 - 11.
17894 */
17895
17896 if (cf == 0)
17897 {
17898 cf = ct;
17899 ct = 0;
17900 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17901 }
17902
17903 tmp = expand_simple_binop (mode, AND,
17904 copy_rtx (tmp),
17905 gen_int_mode (cf - ct, mode),
17906 copy_rtx (tmp), 1, OPTAB_DIRECT);
17907 if (ct)
17908 tmp = expand_simple_binop (mode, PLUS,
17909 copy_rtx (tmp), GEN_INT (ct),
17910 copy_rtx (tmp), 1, OPTAB_DIRECT);
17911 }
17912
17913 if (!rtx_equal_p (tmp, out))
17914 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17915
17916 return true;
17917 }
17918
17919 if (diff < 0)
17920 {
17921 enum machine_mode cmp_mode = GET_MODE (op0);
17922
17923 HOST_WIDE_INT tmp;
17924 tmp = ct, ct = cf, cf = tmp;
17925 diff = -diff;
17926
17927 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17928 {
17929 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17930
17931 /* We may be reversing unordered compare to normal compare, that
17932 is not valid in general (we may convert non-trapping condition
17933 to trapping one), however on i386 we currently emit all
17934 comparisons unordered. */
17935 compare_code = reverse_condition_maybe_unordered (compare_code);
17936 code = reverse_condition_maybe_unordered (code);
17937 }
17938 else
17939 {
17940 compare_code = reverse_condition (compare_code);
17941 code = reverse_condition (code);
17942 }
17943 }
17944
17945 compare_code = UNKNOWN;
17946 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17947 && CONST_INT_P (op1))
17948 {
17949 if (op1 == const0_rtx
17950 && (code == LT || code == GE))
17951 compare_code = code;
17952 else if (op1 == constm1_rtx)
17953 {
17954 if (code == LE)
17955 compare_code = LT;
17956 else if (code == GT)
17957 compare_code = GE;
17958 }
17959 }
17960
17961 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17962 if (compare_code != UNKNOWN
17963 && GET_MODE (op0) == GET_MODE (out)
17964 && (cf == -1 || ct == -1))
17965 {
17966 /* If lea code below could be used, only optimize
17967 if it results in a 2 insn sequence. */
17968
17969 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17970 || diff == 3 || diff == 5 || diff == 9)
17971 || (compare_code == LT && ct == -1)
17972 || (compare_code == GE && cf == -1))
17973 {
17974 /*
17975 * notl op1 (if necessary)
17976 * sarl $31, op1
17977 * orl cf, op1
17978 */
17979 if (ct != -1)
17980 {
17981 cf = ct;
17982 ct = -1;
17983 code = reverse_condition (code);
17984 }
17985
17986 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17987
17988 out = expand_simple_binop (mode, IOR,
17989 out, GEN_INT (cf),
17990 out, 1, OPTAB_DIRECT);
17991 if (out != operands[0])
17992 emit_move_insn (operands[0], out);
17993
17994 return true;
17995 }
17996 }
17997
17998
17999 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18000 || diff == 3 || diff == 5 || diff == 9)
18001 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18002 && (mode != DImode
18003 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18004 {
18005 /*
18006 * xorl dest,dest
18007 * cmpl op1,op2
18008 * setcc dest
18009 * lea cf(dest*(ct-cf)),dest
18010 *
18011 * Size 14.
18012 *
18013 * This also catches the degenerate setcc-only case.
18014 */
18015
18016 rtx tmp;
18017 int nops;
18018
18019 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18020
18021 nops = 0;
18022 /* On x86_64 the lea instruction operates on Pmode, so we need
18023 to get arithmetics done in proper mode to match. */
18024 if (diff == 1)
18025 tmp = copy_rtx (out);
18026 else
18027 {
18028 rtx out1;
18029 out1 = copy_rtx (out);
18030 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18031 nops++;
18032 if (diff & 1)
18033 {
18034 tmp = gen_rtx_PLUS (mode, tmp, out1);
18035 nops++;
18036 }
18037 }
18038 if (cf != 0)
18039 {
18040 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18041 nops++;
18042 }
18043 if (!rtx_equal_p (tmp, out))
18044 {
18045 if (nops == 1)
18046 out = force_operand (tmp, copy_rtx (out));
18047 else
18048 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18049 }
18050 if (!rtx_equal_p (out, operands[0]))
18051 emit_move_insn (operands[0], copy_rtx (out));
18052
18053 return true;
18054 }
18055
18056 /*
18057 * General case: Jumpful:
18058 * xorl dest,dest cmpl op1, op2
18059 * cmpl op1, op2 movl ct, dest
18060 * setcc dest jcc 1f
18061 * decl dest movl cf, dest
18062 * andl (cf-ct),dest 1:
18063 * addl ct,dest
18064 *
18065 * Size 20. Size 14.
18066 *
18067 * This is reasonably steep, but branch mispredict costs are
18068 * high on modern cpus, so consider failing only if optimizing
18069 * for space.
18070 */
18071
18072 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18073 && BRANCH_COST (optimize_insn_for_speed_p (),
18074 false) >= 2)
18075 {
18076 if (cf == 0)
18077 {
18078 enum machine_mode cmp_mode = GET_MODE (op0);
18079
18080 cf = ct;
18081 ct = 0;
18082
18083 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18084 {
18085 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18086
18087 /* We may be reversing unordered compare to normal compare,
18088 that is not valid in general (we may convert non-trapping
18089 condition to trapping one), however on i386 we currently
18090 emit all comparisons unordered. */
18091 code = reverse_condition_maybe_unordered (code);
18092 }
18093 else
18094 {
18095 code = reverse_condition (code);
18096 if (compare_code != UNKNOWN)
18097 compare_code = reverse_condition (compare_code);
18098 }
18099 }
18100
18101 if (compare_code != UNKNOWN)
18102 {
18103 /* notl op1 (if needed)
18104 sarl $31, op1
18105 andl (cf-ct), op1
18106 addl ct, op1
18107
18108 For x < 0 (resp. x <= -1) there will be no notl,
18109 so if possible swap the constants to get rid of the
18110 complement.
18111 True/false will be -1/0 while code below (store flag
18112 followed by decrement) is 0/-1, so the constants need
18113 to be exchanged once more. */
18114
18115 if (compare_code == GE || !cf)
18116 {
18117 code = reverse_condition (code);
18118 compare_code = LT;
18119 }
18120 else
18121 {
18122 HOST_WIDE_INT tmp = cf;
18123 cf = ct;
18124 ct = tmp;
18125 }
18126
18127 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18128 }
18129 else
18130 {
18131 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18132
18133 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18134 constm1_rtx,
18135 copy_rtx (out), 1, OPTAB_DIRECT);
18136 }
18137
18138 out = expand_simple_binop (mode, AND, copy_rtx (out),
18139 gen_int_mode (cf - ct, mode),
18140 copy_rtx (out), 1, OPTAB_DIRECT);
18141 if (ct)
18142 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18143 copy_rtx (out), 1, OPTAB_DIRECT);
18144 if (!rtx_equal_p (out, operands[0]))
18145 emit_move_insn (operands[0], copy_rtx (out));
18146
18147 return true;
18148 }
18149 }
18150
18151 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18152 {
18153 /* Try a few things more with specific constants and a variable. */
18154
18155 optab op;
18156 rtx var, orig_out, out, tmp;
18157
18158 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18159 return false;
18160
18161 /* If one of the two operands is an interesting constant, load a
18162 constant with the above and mask it in with a logical operation. */
18163
18164 if (CONST_INT_P (operands[2]))
18165 {
18166 var = operands[3];
18167 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18168 operands[3] = constm1_rtx, op = and_optab;
18169 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18170 operands[3] = const0_rtx, op = ior_optab;
18171 else
18172 return false;
18173 }
18174 else if (CONST_INT_P (operands[3]))
18175 {
18176 var = operands[2];
18177 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18178 operands[2] = constm1_rtx, op = and_optab;
18179 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18180 operands[2] = const0_rtx, op = ior_optab;
18181 else
18182 return false;
18183 }
18184 else
18185 return false;
18186
18187 orig_out = operands[0];
18188 tmp = gen_reg_rtx (mode);
18189 operands[0] = tmp;
18190
18191 /* Recurse to get the constant loaded. */
18192 if (ix86_expand_int_movcc (operands) == 0)
18193 return false;
18194
18195 /* Mask in the interesting variable. */
18196 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18197 OPTAB_WIDEN);
18198 if (!rtx_equal_p (out, orig_out))
18199 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18200
18201 return true;
18202 }
18203
18204 /*
18205 * For comparison with above,
18206 *
18207 * movl cf,dest
18208 * movl ct,tmp
18209 * cmpl op1,op2
18210 * cmovcc tmp,dest
18211 *
18212 * Size 15.
18213 */
18214
18215 if (! nonimmediate_operand (operands[2], mode))
18216 operands[2] = force_reg (mode, operands[2]);
18217 if (! nonimmediate_operand (operands[3], mode))
18218 operands[3] = force_reg (mode, operands[3]);
18219
18220 if (! register_operand (operands[2], VOIDmode)
18221 && (mode == QImode
18222 || ! register_operand (operands[3], VOIDmode)))
18223 operands[2] = force_reg (mode, operands[2]);
18224
18225 if (mode == QImode
18226 && ! register_operand (operands[3], VOIDmode))
18227 operands[3] = force_reg (mode, operands[3]);
18228
18229 emit_insn (compare_seq);
18230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18231 gen_rtx_IF_THEN_ELSE (mode,
18232 compare_op, operands[2],
18233 operands[3])));
18234 return true;
18235 }
18236
18237 /* Swap, force into registers, or otherwise massage the two operands
18238 to an sse comparison with a mask result. Thus we differ a bit from
18239 ix86_prepare_fp_compare_args which expects to produce a flags result.
18240
18241 The DEST operand exists to help determine whether to commute commutative
18242 operators. The POP0/POP1 operands are updated in place. The new
18243 comparison code is returned, or UNKNOWN if not implementable. */
18244
18245 static enum rtx_code
18246 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18247 rtx *pop0, rtx *pop1)
18248 {
18249 rtx tmp;
18250
18251 switch (code)
18252 {
18253 case LTGT:
18254 case UNEQ:
18255 /* We have no LTGT as an operator. We could implement it with
18256 NE & ORDERED, but this requires an extra temporary. It's
18257 not clear that it's worth it. */
18258 return UNKNOWN;
18259
18260 case LT:
18261 case LE:
18262 case UNGT:
18263 case UNGE:
18264 /* These are supported directly. */
18265 break;
18266
18267 case EQ:
18268 case NE:
18269 case UNORDERED:
18270 case ORDERED:
18271 /* For commutative operators, try to canonicalize the destination
18272 operand to be first in the comparison - this helps reload to
18273 avoid extra moves. */
18274 if (!dest || !rtx_equal_p (dest, *pop1))
18275 break;
18276 /* FALLTHRU */
18277
18278 case GE:
18279 case GT:
18280 case UNLE:
18281 case UNLT:
18282 /* These are not supported directly. Swap the comparison operands
18283 to transform into something that is supported. */
18284 tmp = *pop0;
18285 *pop0 = *pop1;
18286 *pop1 = tmp;
18287 code = swap_condition (code);
18288 break;
18289
18290 default:
18291 gcc_unreachable ();
18292 }
18293
18294 return code;
18295 }
18296
18297 /* Detect conditional moves that exactly match min/max operational
18298 semantics. Note that this is IEEE safe, as long as we don't
18299 interchange the operands.
18300
18301 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18302 and TRUE if the operation is successful and instructions are emitted. */
18303
18304 static bool
18305 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18306 rtx cmp_op1, rtx if_true, rtx if_false)
18307 {
18308 enum machine_mode mode;
18309 bool is_min;
18310 rtx tmp;
18311
18312 if (code == LT)
18313 ;
18314 else if (code == UNGE)
18315 {
18316 tmp = if_true;
18317 if_true = if_false;
18318 if_false = tmp;
18319 }
18320 else
18321 return false;
18322
18323 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18324 is_min = true;
18325 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18326 is_min = false;
18327 else
18328 return false;
18329
18330 mode = GET_MODE (dest);
18331
18332 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18333 but MODE may be a vector mode and thus not appropriate. */
18334 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18335 {
18336 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18337 rtvec v;
18338
18339 if_true = force_reg (mode, if_true);
18340 v = gen_rtvec (2, if_true, if_false);
18341 tmp = gen_rtx_UNSPEC (mode, v, u);
18342 }
18343 else
18344 {
18345 code = is_min ? SMIN : SMAX;
18346 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18347 }
18348
18349 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18350 return true;
18351 }
18352
18353 /* Expand an sse vector comparison. Return the register with the result. */
18354
18355 static rtx
18356 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18357 rtx op_true, rtx op_false)
18358 {
18359 enum machine_mode mode = GET_MODE (dest);
18360 rtx x;
18361
18362 cmp_op0 = force_reg (mode, cmp_op0);
18363 if (!nonimmediate_operand (cmp_op1, mode))
18364 cmp_op1 = force_reg (mode, cmp_op1);
18365
18366 if (optimize
18367 || reg_overlap_mentioned_p (dest, op_true)
18368 || reg_overlap_mentioned_p (dest, op_false))
18369 dest = gen_reg_rtx (mode);
18370
18371 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18372 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18373
18374 return dest;
18375 }
18376
18377 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18378 operations. This is used for both scalar and vector conditional moves. */
18379
18380 static void
18381 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18382 {
18383 enum machine_mode mode = GET_MODE (dest);
18384 rtx t2, t3, x;
18385
18386 if (op_false == CONST0_RTX (mode))
18387 {
18388 op_true = force_reg (mode, op_true);
18389 x = gen_rtx_AND (mode, cmp, op_true);
18390 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18391 }
18392 else if (op_true == CONST0_RTX (mode))
18393 {
18394 op_false = force_reg (mode, op_false);
18395 x = gen_rtx_NOT (mode, cmp);
18396 x = gen_rtx_AND (mode, x, op_false);
18397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18398 }
18399 else if (TARGET_XOP)
18400 {
18401 rtx pcmov = gen_rtx_SET (mode, dest,
18402 gen_rtx_IF_THEN_ELSE (mode, cmp,
18403 op_true,
18404 op_false));
18405 emit_insn (pcmov);
18406 }
18407 else
18408 {
18409 op_true = force_reg (mode, op_true);
18410 op_false = force_reg (mode, op_false);
18411
18412 t2 = gen_reg_rtx (mode);
18413 if (optimize)
18414 t3 = gen_reg_rtx (mode);
18415 else
18416 t3 = dest;
18417
18418 x = gen_rtx_AND (mode, op_true, cmp);
18419 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18420
18421 x = gen_rtx_NOT (mode, cmp);
18422 x = gen_rtx_AND (mode, x, op_false);
18423 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18424
18425 x = gen_rtx_IOR (mode, t3, t2);
18426 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18427 }
18428 }
18429
18430 /* Expand a floating-point conditional move. Return true if successful. */
18431
18432 bool
18433 ix86_expand_fp_movcc (rtx operands[])
18434 {
18435 enum machine_mode mode = GET_MODE (operands[0]);
18436 enum rtx_code code = GET_CODE (operands[1]);
18437 rtx tmp, compare_op;
18438 rtx op0 = XEXP (operands[1], 0);
18439 rtx op1 = XEXP (operands[1], 1);
18440
18441 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18442 {
18443 enum machine_mode cmode;
18444
18445 /* Since we've no cmove for sse registers, don't force bad register
18446 allocation just to gain access to it. Deny movcc when the
18447 comparison mode doesn't match the move mode. */
18448 cmode = GET_MODE (op0);
18449 if (cmode == VOIDmode)
18450 cmode = GET_MODE (op1);
18451 if (cmode != mode)
18452 return false;
18453
18454 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18455 if (code == UNKNOWN)
18456 return false;
18457
18458 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18459 operands[2], operands[3]))
18460 return true;
18461
18462 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18463 operands[2], operands[3]);
18464 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18465 return true;
18466 }
18467
18468 /* The floating point conditional move instructions don't directly
18469 support conditions resulting from a signed integer comparison. */
18470
18471 compare_op = ix86_expand_compare (code, op0, op1);
18472 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18473 {
18474 tmp = gen_reg_rtx (QImode);
18475 ix86_expand_setcc (tmp, code, op0, op1);
18476
18477 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18478 }
18479
18480 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18481 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18482 operands[2], operands[3])));
18483
18484 return true;
18485 }
18486
18487 /* Expand a floating-point vector conditional move; a vcond operation
18488 rather than a movcc operation. */
18489
18490 bool
18491 ix86_expand_fp_vcond (rtx operands[])
18492 {
18493 enum rtx_code code = GET_CODE (operands[3]);
18494 rtx cmp;
18495
18496 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18497 &operands[4], &operands[5]);
18498 if (code == UNKNOWN)
18499 return false;
18500
18501 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18502 operands[5], operands[1], operands[2]))
18503 return true;
18504
18505 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18506 operands[1], operands[2]);
18507 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18508 return true;
18509 }
18510
18511 /* Expand a signed/unsigned integral vector conditional move. */
18512
18513 bool
18514 ix86_expand_int_vcond (rtx operands[])
18515 {
18516 enum machine_mode mode = GET_MODE (operands[0]);
18517 enum rtx_code code = GET_CODE (operands[3]);
18518 bool negate = false;
18519 rtx x, cop0, cop1;
18520
18521 cop0 = operands[4];
18522 cop1 = operands[5];
18523
18524 /* XOP supports all of the comparisons on all vector int types. */
18525 if (!TARGET_XOP)
18526 {
18527 /* Canonicalize the comparison to EQ, GT, GTU. */
18528 switch (code)
18529 {
18530 case EQ:
18531 case GT:
18532 case GTU:
18533 break;
18534
18535 case NE:
18536 case LE:
18537 case LEU:
18538 code = reverse_condition (code);
18539 negate = true;
18540 break;
18541
18542 case GE:
18543 case GEU:
18544 code = reverse_condition (code);
18545 negate = true;
18546 /* FALLTHRU */
18547
18548 case LT:
18549 case LTU:
18550 code = swap_condition (code);
18551 x = cop0, cop0 = cop1, cop1 = x;
18552 break;
18553
18554 default:
18555 gcc_unreachable ();
18556 }
18557
18558 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18559 if (mode == V2DImode)
18560 {
18561 switch (code)
18562 {
18563 case EQ:
18564 /* SSE4.1 supports EQ. */
18565 if (!TARGET_SSE4_1)
18566 return false;
18567 break;
18568
18569 case GT:
18570 case GTU:
18571 /* SSE4.2 supports GT/GTU. */
18572 if (!TARGET_SSE4_2)
18573 return false;
18574 break;
18575
18576 default:
18577 gcc_unreachable ();
18578 }
18579 }
18580
18581 /* Unsigned parallel compare is not supported by the hardware.
18582 Play some tricks to turn this into a signed comparison
18583 against 0. */
18584 if (code == GTU)
18585 {
18586 cop0 = force_reg (mode, cop0);
18587
18588 switch (mode)
18589 {
18590 case V4SImode:
18591 case V2DImode:
18592 {
18593 rtx t1, t2, mask;
18594 rtx (*gen_sub3) (rtx, rtx, rtx);
18595
18596 /* Subtract (-(INT MAX) - 1) from both operands to make
18597 them signed. */
18598 mask = ix86_build_signbit_mask (mode, true, false);
18599 gen_sub3 = (mode == V4SImode
18600 ? gen_subv4si3 : gen_subv2di3);
18601 t1 = gen_reg_rtx (mode);
18602 emit_insn (gen_sub3 (t1, cop0, mask));
18603
18604 t2 = gen_reg_rtx (mode);
18605 emit_insn (gen_sub3 (t2, cop1, mask));
18606
18607 cop0 = t1;
18608 cop1 = t2;
18609 code = GT;
18610 }
18611 break;
18612
18613 case V16QImode:
18614 case V8HImode:
18615 /* Perform a parallel unsigned saturating subtraction. */
18616 x = gen_reg_rtx (mode);
18617 emit_insn (gen_rtx_SET (VOIDmode, x,
18618 gen_rtx_US_MINUS (mode, cop0, cop1)));
18619
18620 cop0 = x;
18621 cop1 = CONST0_RTX (mode);
18622 code = EQ;
18623 negate = !negate;
18624 break;
18625
18626 default:
18627 gcc_unreachable ();
18628 }
18629 }
18630 }
18631
18632 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18633 operands[1+negate], operands[2-negate]);
18634
18635 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18636 operands[2-negate]);
18637 return true;
18638 }
18639
18640 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18641 true if we should do zero extension, else sign extension. HIGH_P is
18642 true if we want the N/2 high elements, else the low elements. */
18643
18644 void
18645 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18646 {
18647 enum machine_mode imode = GET_MODE (operands[1]);
18648 rtx tmp, dest;
18649
18650 if (TARGET_SSE4_1)
18651 {
18652 rtx (*unpack)(rtx, rtx);
18653
18654 switch (imode)
18655 {
18656 case V16QImode:
18657 if (unsigned_p)
18658 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18659 else
18660 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18661 break;
18662 case V8HImode:
18663 if (unsigned_p)
18664 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18665 else
18666 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18667 break;
18668 case V4SImode:
18669 if (unsigned_p)
18670 unpack = gen_sse4_1_zero_extendv2siv2di2;
18671 else
18672 unpack = gen_sse4_1_sign_extendv2siv2di2;
18673 break;
18674 default:
18675 gcc_unreachable ();
18676 }
18677
18678 if (high_p)
18679 {
18680 /* Shift higher 8 bytes to lower 8 bytes. */
18681 tmp = gen_reg_rtx (imode);
18682 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18683 gen_lowpart (V1TImode, operands[1]),
18684 GEN_INT (64)));
18685 }
18686 else
18687 tmp = operands[1];
18688
18689 emit_insn (unpack (operands[0], tmp));
18690 }
18691 else
18692 {
18693 rtx (*unpack)(rtx, rtx, rtx);
18694
18695 switch (imode)
18696 {
18697 case V16QImode:
18698 if (high_p)
18699 unpack = gen_vec_interleave_highv16qi;
18700 else
18701 unpack = gen_vec_interleave_lowv16qi;
18702 break;
18703 case V8HImode:
18704 if (high_p)
18705 unpack = gen_vec_interleave_highv8hi;
18706 else
18707 unpack = gen_vec_interleave_lowv8hi;
18708 break;
18709 case V4SImode:
18710 if (high_p)
18711 unpack = gen_vec_interleave_highv4si;
18712 else
18713 unpack = gen_vec_interleave_lowv4si;
18714 break;
18715 default:
18716 gcc_unreachable ();
18717 }
18718
18719 dest = gen_lowpart (imode, operands[0]);
18720
18721 if (unsigned_p)
18722 tmp = force_reg (imode, CONST0_RTX (imode));
18723 else
18724 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18725 operands[1], pc_rtx, pc_rtx);
18726
18727 emit_insn (unpack (dest, operands[1], tmp));
18728 }
18729 }
18730
18731 /* Expand conditional increment or decrement using adb/sbb instructions.
18732 The default case using setcc followed by the conditional move can be
18733 done by generic code. */
18734 bool
18735 ix86_expand_int_addcc (rtx operands[])
18736 {
18737 enum rtx_code code = GET_CODE (operands[1]);
18738 rtx flags;
18739 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18740 rtx compare_op;
18741 rtx val = const0_rtx;
18742 bool fpcmp = false;
18743 enum machine_mode mode;
18744 rtx op0 = XEXP (operands[1], 0);
18745 rtx op1 = XEXP (operands[1], 1);
18746
18747 if (operands[3] != const1_rtx
18748 && operands[3] != constm1_rtx)
18749 return false;
18750 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18751 return false;
18752 code = GET_CODE (compare_op);
18753
18754 flags = XEXP (compare_op, 0);
18755
18756 if (GET_MODE (flags) == CCFPmode
18757 || GET_MODE (flags) == CCFPUmode)
18758 {
18759 fpcmp = true;
18760 code = ix86_fp_compare_code_to_integer (code);
18761 }
18762
18763 if (code != LTU)
18764 {
18765 val = constm1_rtx;
18766 if (fpcmp)
18767 PUT_CODE (compare_op,
18768 reverse_condition_maybe_unordered
18769 (GET_CODE (compare_op)));
18770 else
18771 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18772 }
18773
18774 mode = GET_MODE (operands[0]);
18775
18776 /* Construct either adc or sbb insn. */
18777 if ((code == LTU) == (operands[3] == constm1_rtx))
18778 {
18779 switch (mode)
18780 {
18781 case QImode:
18782 insn = gen_subqi3_carry;
18783 break;
18784 case HImode:
18785 insn = gen_subhi3_carry;
18786 break;
18787 case SImode:
18788 insn = gen_subsi3_carry;
18789 break;
18790 case DImode:
18791 insn = gen_subdi3_carry;
18792 break;
18793 default:
18794 gcc_unreachable ();
18795 }
18796 }
18797 else
18798 {
18799 switch (mode)
18800 {
18801 case QImode:
18802 insn = gen_addqi3_carry;
18803 break;
18804 case HImode:
18805 insn = gen_addhi3_carry;
18806 break;
18807 case SImode:
18808 insn = gen_addsi3_carry;
18809 break;
18810 case DImode:
18811 insn = gen_adddi3_carry;
18812 break;
18813 default:
18814 gcc_unreachable ();
18815 }
18816 }
18817 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18818
18819 return true;
18820 }
18821
18822
18823 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18824 but works for floating pointer parameters and nonoffsetable memories.
18825 For pushes, it returns just stack offsets; the values will be saved
18826 in the right order. Maximally three parts are generated. */
18827
18828 static int
18829 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18830 {
18831 int size;
18832
18833 if (!TARGET_64BIT)
18834 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18835 else
18836 size = (GET_MODE_SIZE (mode) + 4) / 8;
18837
18838 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18839 gcc_assert (size >= 2 && size <= 4);
18840
18841 /* Optimize constant pool reference to immediates. This is used by fp
18842 moves, that force all constants to memory to allow combining. */
18843 if (MEM_P (operand) && MEM_READONLY_P (operand))
18844 {
18845 rtx tmp = maybe_get_pool_constant (operand);
18846 if (tmp)
18847 operand = tmp;
18848 }
18849
18850 if (MEM_P (operand) && !offsettable_memref_p (operand))
18851 {
18852 /* The only non-offsetable memories we handle are pushes. */
18853 int ok = push_operand (operand, VOIDmode);
18854
18855 gcc_assert (ok);
18856
18857 operand = copy_rtx (operand);
18858 PUT_MODE (operand, Pmode);
18859 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18860 return size;
18861 }
18862
18863 if (GET_CODE (operand) == CONST_VECTOR)
18864 {
18865 enum machine_mode imode = int_mode_for_mode (mode);
18866 /* Caution: if we looked through a constant pool memory above,
18867 the operand may actually have a different mode now. That's
18868 ok, since we want to pun this all the way back to an integer. */
18869 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18870 gcc_assert (operand != NULL);
18871 mode = imode;
18872 }
18873
18874 if (!TARGET_64BIT)
18875 {
18876 if (mode == DImode)
18877 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18878 else
18879 {
18880 int i;
18881
18882 if (REG_P (operand))
18883 {
18884 gcc_assert (reload_completed);
18885 for (i = 0; i < size; i++)
18886 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18887 }
18888 else if (offsettable_memref_p (operand))
18889 {
18890 operand = adjust_address (operand, SImode, 0);
18891 parts[0] = operand;
18892 for (i = 1; i < size; i++)
18893 parts[i] = adjust_address (operand, SImode, 4 * i);
18894 }
18895 else if (GET_CODE (operand) == CONST_DOUBLE)
18896 {
18897 REAL_VALUE_TYPE r;
18898 long l[4];
18899
18900 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18901 switch (mode)
18902 {
18903 case TFmode:
18904 real_to_target (l, &r, mode);
18905 parts[3] = gen_int_mode (l[3], SImode);
18906 parts[2] = gen_int_mode (l[2], SImode);
18907 break;
18908 case XFmode:
18909 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18910 parts[2] = gen_int_mode (l[2], SImode);
18911 break;
18912 case DFmode:
18913 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18914 break;
18915 default:
18916 gcc_unreachable ();
18917 }
18918 parts[1] = gen_int_mode (l[1], SImode);
18919 parts[0] = gen_int_mode (l[0], SImode);
18920 }
18921 else
18922 gcc_unreachable ();
18923 }
18924 }
18925 else
18926 {
18927 if (mode == TImode)
18928 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18929 if (mode == XFmode || mode == TFmode)
18930 {
18931 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18932 if (REG_P (operand))
18933 {
18934 gcc_assert (reload_completed);
18935 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18936 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18937 }
18938 else if (offsettable_memref_p (operand))
18939 {
18940 operand = adjust_address (operand, DImode, 0);
18941 parts[0] = operand;
18942 parts[1] = adjust_address (operand, upper_mode, 8);
18943 }
18944 else if (GET_CODE (operand) == CONST_DOUBLE)
18945 {
18946 REAL_VALUE_TYPE r;
18947 long l[4];
18948
18949 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18950 real_to_target (l, &r, mode);
18951
18952 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18953 if (HOST_BITS_PER_WIDE_INT >= 64)
18954 parts[0]
18955 = gen_int_mode
18956 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18957 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18958 DImode);
18959 else
18960 parts[0] = immed_double_const (l[0], l[1], DImode);
18961
18962 if (upper_mode == SImode)
18963 parts[1] = gen_int_mode (l[2], SImode);
18964 else if (HOST_BITS_PER_WIDE_INT >= 64)
18965 parts[1]
18966 = gen_int_mode
18967 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18968 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18969 DImode);
18970 else
18971 parts[1] = immed_double_const (l[2], l[3], DImode);
18972 }
18973 else
18974 gcc_unreachable ();
18975 }
18976 }
18977
18978 return size;
18979 }
18980
18981 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18982 Return false when normal moves are needed; true when all required
18983 insns have been emitted. Operands 2-4 contain the input values
18984 int the correct order; operands 5-7 contain the output values. */
18985
18986 void
18987 ix86_split_long_move (rtx operands[])
18988 {
18989 rtx part[2][4];
18990 int nparts, i, j;
18991 int push = 0;
18992 int collisions = 0;
18993 enum machine_mode mode = GET_MODE (operands[0]);
18994 bool collisionparts[4];
18995
18996 /* The DFmode expanders may ask us to move double.
18997 For 64bit target this is single move. By hiding the fact
18998 here we simplify i386.md splitters. */
18999 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19000 {
19001 /* Optimize constant pool reference to immediates. This is used by
19002 fp moves, that force all constants to memory to allow combining. */
19003
19004 if (MEM_P (operands[1])
19005 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19006 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19007 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19008 if (push_operand (operands[0], VOIDmode))
19009 {
19010 operands[0] = copy_rtx (operands[0]);
19011 PUT_MODE (operands[0], Pmode);
19012 }
19013 else
19014 operands[0] = gen_lowpart (DImode, operands[0]);
19015 operands[1] = gen_lowpart (DImode, operands[1]);
19016 emit_move_insn (operands[0], operands[1]);
19017 return;
19018 }
19019
19020 /* The only non-offsettable memory we handle is push. */
19021 if (push_operand (operands[0], VOIDmode))
19022 push = 1;
19023 else
19024 gcc_assert (!MEM_P (operands[0])
19025 || offsettable_memref_p (operands[0]));
19026
19027 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19028 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19029
19030 /* When emitting push, take care for source operands on the stack. */
19031 if (push && MEM_P (operands[1])
19032 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19033 {
19034 rtx src_base = XEXP (part[1][nparts - 1], 0);
19035
19036 /* Compensate for the stack decrement by 4. */
19037 if (!TARGET_64BIT && nparts == 3
19038 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19039 src_base = plus_constant (src_base, 4);
19040
19041 /* src_base refers to the stack pointer and is
19042 automatically decreased by emitted push. */
19043 for (i = 0; i < nparts; i++)
19044 part[1][i] = change_address (part[1][i],
19045 GET_MODE (part[1][i]), src_base);
19046 }
19047
19048 /* We need to do copy in the right order in case an address register
19049 of the source overlaps the destination. */
19050 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19051 {
19052 rtx tmp;
19053
19054 for (i = 0; i < nparts; i++)
19055 {
19056 collisionparts[i]
19057 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19058 if (collisionparts[i])
19059 collisions++;
19060 }
19061
19062 /* Collision in the middle part can be handled by reordering. */
19063 if (collisions == 1 && nparts == 3 && collisionparts [1])
19064 {
19065 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19066 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19067 }
19068 else if (collisions == 1
19069 && nparts == 4
19070 && (collisionparts [1] || collisionparts [2]))
19071 {
19072 if (collisionparts [1])
19073 {
19074 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19075 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19076 }
19077 else
19078 {
19079 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19080 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19081 }
19082 }
19083
19084 /* If there are more collisions, we can't handle it by reordering.
19085 Do an lea to the last part and use only one colliding move. */
19086 else if (collisions > 1)
19087 {
19088 rtx base;
19089
19090 collisions = 1;
19091
19092 base = part[0][nparts - 1];
19093
19094 /* Handle the case when the last part isn't valid for lea.
19095 Happens in 64-bit mode storing the 12-byte XFmode. */
19096 if (GET_MODE (base) != Pmode)
19097 base = gen_rtx_REG (Pmode, REGNO (base));
19098
19099 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19100 part[1][0] = replace_equiv_address (part[1][0], base);
19101 for (i = 1; i < nparts; i++)
19102 {
19103 tmp = plus_constant (base, UNITS_PER_WORD * i);
19104 part[1][i] = replace_equiv_address (part[1][i], tmp);
19105 }
19106 }
19107 }
19108
19109 if (push)
19110 {
19111 if (!TARGET_64BIT)
19112 {
19113 if (nparts == 3)
19114 {
19115 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19116 emit_insn (gen_addsi3 (stack_pointer_rtx,
19117 stack_pointer_rtx, GEN_INT (-4)));
19118 emit_move_insn (part[0][2], part[1][2]);
19119 }
19120 else if (nparts == 4)
19121 {
19122 emit_move_insn (part[0][3], part[1][3]);
19123 emit_move_insn (part[0][2], part[1][2]);
19124 }
19125 }
19126 else
19127 {
19128 /* In 64bit mode we don't have 32bit push available. In case this is
19129 register, it is OK - we will just use larger counterpart. We also
19130 retype memory - these comes from attempt to avoid REX prefix on
19131 moving of second half of TFmode value. */
19132 if (GET_MODE (part[1][1]) == SImode)
19133 {
19134 switch (GET_CODE (part[1][1]))
19135 {
19136 case MEM:
19137 part[1][1] = adjust_address (part[1][1], DImode, 0);
19138 break;
19139
19140 case REG:
19141 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19142 break;
19143
19144 default:
19145 gcc_unreachable ();
19146 }
19147
19148 if (GET_MODE (part[1][0]) == SImode)
19149 part[1][0] = part[1][1];
19150 }
19151 }
19152 emit_move_insn (part[0][1], part[1][1]);
19153 emit_move_insn (part[0][0], part[1][0]);
19154 return;
19155 }
19156
19157 /* Choose correct order to not overwrite the source before it is copied. */
19158 if ((REG_P (part[0][0])
19159 && REG_P (part[1][1])
19160 && (REGNO (part[0][0]) == REGNO (part[1][1])
19161 || (nparts == 3
19162 && REGNO (part[0][0]) == REGNO (part[1][2]))
19163 || (nparts == 4
19164 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19165 || (collisions > 0
19166 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19167 {
19168 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19169 {
19170 operands[2 + i] = part[0][j];
19171 operands[6 + i] = part[1][j];
19172 }
19173 }
19174 else
19175 {
19176 for (i = 0; i < nparts; i++)
19177 {
19178 operands[2 + i] = part[0][i];
19179 operands[6 + i] = part[1][i];
19180 }
19181 }
19182
19183 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19184 if (optimize_insn_for_size_p ())
19185 {
19186 for (j = 0; j < nparts - 1; j++)
19187 if (CONST_INT_P (operands[6 + j])
19188 && operands[6 + j] != const0_rtx
19189 && REG_P (operands[2 + j]))
19190 for (i = j; i < nparts - 1; i++)
19191 if (CONST_INT_P (operands[7 + i])
19192 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19193 operands[7 + i] = operands[2 + j];
19194 }
19195
19196 for (i = 0; i < nparts; i++)
19197 emit_move_insn (operands[2 + i], operands[6 + i]);
19198
19199 return;
19200 }
19201
19202 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19203 left shift by a constant, either using a single shift or
19204 a sequence of add instructions. */
19205
19206 static void
19207 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19208 {
19209 rtx (*insn)(rtx, rtx, rtx);
19210
19211 if (count == 1
19212 || (count * ix86_cost->add <= ix86_cost->shift_const
19213 && !optimize_insn_for_size_p ()))
19214 {
19215 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19216 while (count-- > 0)
19217 emit_insn (insn (operand, operand, operand));
19218 }
19219 else
19220 {
19221 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19222 emit_insn (insn (operand, operand, GEN_INT (count)));
19223 }
19224 }
19225
19226 void
19227 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19228 {
19229 rtx (*gen_ashl3)(rtx, rtx, rtx);
19230 rtx (*gen_shld)(rtx, rtx, rtx);
19231 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19232
19233 rtx low[2], high[2];
19234 int count;
19235
19236 if (CONST_INT_P (operands[2]))
19237 {
19238 split_double_mode (mode, operands, 2, low, high);
19239 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19240
19241 if (count >= half_width)
19242 {
19243 emit_move_insn (high[0], low[1]);
19244 emit_move_insn (low[0], const0_rtx);
19245
19246 if (count > half_width)
19247 ix86_expand_ashl_const (high[0], count - half_width, mode);
19248 }
19249 else
19250 {
19251 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19252
19253 if (!rtx_equal_p (operands[0], operands[1]))
19254 emit_move_insn (operands[0], operands[1]);
19255
19256 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19257 ix86_expand_ashl_const (low[0], count, mode);
19258 }
19259 return;
19260 }
19261
19262 split_double_mode (mode, operands, 1, low, high);
19263
19264 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19265
19266 if (operands[1] == const1_rtx)
19267 {
19268 /* Assuming we've chosen a QImode capable registers, then 1 << N
19269 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19270 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19271 {
19272 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19273
19274 ix86_expand_clear (low[0]);
19275 ix86_expand_clear (high[0]);
19276 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19277
19278 d = gen_lowpart (QImode, low[0]);
19279 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19280 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19281 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19282
19283 d = gen_lowpart (QImode, high[0]);
19284 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19285 s = gen_rtx_NE (QImode, flags, const0_rtx);
19286 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19287 }
19288
19289 /* Otherwise, we can get the same results by manually performing
19290 a bit extract operation on bit 5/6, and then performing the two
19291 shifts. The two methods of getting 0/1 into low/high are exactly
19292 the same size. Avoiding the shift in the bit extract case helps
19293 pentium4 a bit; no one else seems to care much either way. */
19294 else
19295 {
19296 enum machine_mode half_mode;
19297 rtx (*gen_lshr3)(rtx, rtx, rtx);
19298 rtx (*gen_and3)(rtx, rtx, rtx);
19299 rtx (*gen_xor3)(rtx, rtx, rtx);
19300 HOST_WIDE_INT bits;
19301 rtx x;
19302
19303 if (mode == DImode)
19304 {
19305 half_mode = SImode;
19306 gen_lshr3 = gen_lshrsi3;
19307 gen_and3 = gen_andsi3;
19308 gen_xor3 = gen_xorsi3;
19309 bits = 5;
19310 }
19311 else
19312 {
19313 half_mode = DImode;
19314 gen_lshr3 = gen_lshrdi3;
19315 gen_and3 = gen_anddi3;
19316 gen_xor3 = gen_xordi3;
19317 bits = 6;
19318 }
19319
19320 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19321 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19322 else
19323 x = gen_lowpart (half_mode, operands[2]);
19324 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19325
19326 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19327 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19328 emit_move_insn (low[0], high[0]);
19329 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19330 }
19331
19332 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19333 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19334 return;
19335 }
19336
19337 if (operands[1] == constm1_rtx)
19338 {
19339 /* For -1 << N, we can avoid the shld instruction, because we
19340 know that we're shifting 0...31/63 ones into a -1. */
19341 emit_move_insn (low[0], constm1_rtx);
19342 if (optimize_insn_for_size_p ())
19343 emit_move_insn (high[0], low[0]);
19344 else
19345 emit_move_insn (high[0], constm1_rtx);
19346 }
19347 else
19348 {
19349 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19350
19351 if (!rtx_equal_p (operands[0], operands[1]))
19352 emit_move_insn (operands[0], operands[1]);
19353
19354 split_double_mode (mode, operands, 1, low, high);
19355 emit_insn (gen_shld (high[0], low[0], operands[2]));
19356 }
19357
19358 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19359
19360 if (TARGET_CMOVE && scratch)
19361 {
19362 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19363 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19364
19365 ix86_expand_clear (scratch);
19366 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19367 }
19368 else
19369 {
19370 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19371 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19372
19373 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19374 }
19375 }
19376
19377 void
19378 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19379 {
19380 rtx (*gen_ashr3)(rtx, rtx, rtx)
19381 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19382 rtx (*gen_shrd)(rtx, rtx, rtx);
19383 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19384
19385 rtx low[2], high[2];
19386 int count;
19387
19388 if (CONST_INT_P (operands[2]))
19389 {
19390 split_double_mode (mode, operands, 2, low, high);
19391 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19392
19393 if (count == GET_MODE_BITSIZE (mode) - 1)
19394 {
19395 emit_move_insn (high[0], high[1]);
19396 emit_insn (gen_ashr3 (high[0], high[0],
19397 GEN_INT (half_width - 1)));
19398 emit_move_insn (low[0], high[0]);
19399
19400 }
19401 else if (count >= half_width)
19402 {
19403 emit_move_insn (low[0], high[1]);
19404 emit_move_insn (high[0], low[0]);
19405 emit_insn (gen_ashr3 (high[0], high[0],
19406 GEN_INT (half_width - 1)));
19407
19408 if (count > half_width)
19409 emit_insn (gen_ashr3 (low[0], low[0],
19410 GEN_INT (count - half_width)));
19411 }
19412 else
19413 {
19414 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19415
19416 if (!rtx_equal_p (operands[0], operands[1]))
19417 emit_move_insn (operands[0], operands[1]);
19418
19419 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19420 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19421 }
19422 }
19423 else
19424 {
19425 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19426
19427 if (!rtx_equal_p (operands[0], operands[1]))
19428 emit_move_insn (operands[0], operands[1]);
19429
19430 split_double_mode (mode, operands, 1, low, high);
19431
19432 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19433 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19434
19435 if (TARGET_CMOVE && scratch)
19436 {
19437 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19438 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19439
19440 emit_move_insn (scratch, high[0]);
19441 emit_insn (gen_ashr3 (scratch, scratch,
19442 GEN_INT (half_width - 1)));
19443 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19444 scratch));
19445 }
19446 else
19447 {
19448 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19449 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19450
19451 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19452 }
19453 }
19454 }
19455
19456 void
19457 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19458 {
19459 rtx (*gen_lshr3)(rtx, rtx, rtx)
19460 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19461 rtx (*gen_shrd)(rtx, rtx, rtx);
19462 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19463
19464 rtx low[2], high[2];
19465 int count;
19466
19467 if (CONST_INT_P (operands[2]))
19468 {
19469 split_double_mode (mode, operands, 2, low, high);
19470 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19471
19472 if (count >= half_width)
19473 {
19474 emit_move_insn (low[0], high[1]);
19475 ix86_expand_clear (high[0]);
19476
19477 if (count > half_width)
19478 emit_insn (gen_lshr3 (low[0], low[0],
19479 GEN_INT (count - half_width)));
19480 }
19481 else
19482 {
19483 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19484
19485 if (!rtx_equal_p (operands[0], operands[1]))
19486 emit_move_insn (operands[0], operands[1]);
19487
19488 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19489 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19490 }
19491 }
19492 else
19493 {
19494 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19495
19496 if (!rtx_equal_p (operands[0], operands[1]))
19497 emit_move_insn (operands[0], operands[1]);
19498
19499 split_double_mode (mode, operands, 1, low, high);
19500
19501 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19502 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19503
19504 if (TARGET_CMOVE && scratch)
19505 {
19506 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19507 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19508
19509 ix86_expand_clear (scratch);
19510 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19511 scratch));
19512 }
19513 else
19514 {
19515 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19516 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19517
19518 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19519 }
19520 }
19521 }
19522
19523 /* Predict just emitted jump instruction to be taken with probability PROB. */
19524 static void
19525 predict_jump (int prob)
19526 {
19527 rtx insn = get_last_insn ();
19528 gcc_assert (JUMP_P (insn));
19529 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19530 }
19531
19532 /* Helper function for the string operations below. Dest VARIABLE whether
19533 it is aligned to VALUE bytes. If true, jump to the label. */
19534 static rtx
19535 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19536 {
19537 rtx label = gen_label_rtx ();
19538 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19539 if (GET_MODE (variable) == DImode)
19540 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19541 else
19542 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19543 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19544 1, label);
19545 if (epilogue)
19546 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19547 else
19548 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19549 return label;
19550 }
19551
19552 /* Adjust COUNTER by the VALUE. */
19553 static void
19554 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19555 {
19556 rtx (*gen_add)(rtx, rtx, rtx)
19557 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19558
19559 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19560 }
19561
19562 /* Zero extend possibly SImode EXP to Pmode register. */
19563 rtx
19564 ix86_zero_extend_to_Pmode (rtx exp)
19565 {
19566 rtx r;
19567 if (GET_MODE (exp) == VOIDmode)
19568 return force_reg (Pmode, exp);
19569 if (GET_MODE (exp) == Pmode)
19570 return copy_to_mode_reg (Pmode, exp);
19571 r = gen_reg_rtx (Pmode);
19572 emit_insn (gen_zero_extendsidi2 (r, exp));
19573 return r;
19574 }
19575
19576 /* Divide COUNTREG by SCALE. */
19577 static rtx
19578 scale_counter (rtx countreg, int scale)
19579 {
19580 rtx sc;
19581
19582 if (scale == 1)
19583 return countreg;
19584 if (CONST_INT_P (countreg))
19585 return GEN_INT (INTVAL (countreg) / scale);
19586 gcc_assert (REG_P (countreg));
19587
19588 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19589 GEN_INT (exact_log2 (scale)),
19590 NULL, 1, OPTAB_DIRECT);
19591 return sc;
19592 }
19593
19594 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19595 DImode for constant loop counts. */
19596
19597 static enum machine_mode
19598 counter_mode (rtx count_exp)
19599 {
19600 if (GET_MODE (count_exp) != VOIDmode)
19601 return GET_MODE (count_exp);
19602 if (!CONST_INT_P (count_exp))
19603 return Pmode;
19604 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19605 return DImode;
19606 return SImode;
19607 }
19608
19609 /* When SRCPTR is non-NULL, output simple loop to move memory
19610 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19611 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19612 equivalent loop to set memory by VALUE (supposed to be in MODE).
19613
19614 The size is rounded down to whole number of chunk size moved at once.
19615 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19616
19617
19618 static void
19619 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19620 rtx destptr, rtx srcptr, rtx value,
19621 rtx count, enum machine_mode mode, int unroll,
19622 int expected_size)
19623 {
19624 rtx out_label, top_label, iter, tmp;
19625 enum machine_mode iter_mode = counter_mode (count);
19626 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19627 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19628 rtx size;
19629 rtx x_addr;
19630 rtx y_addr;
19631 int i;
19632
19633 top_label = gen_label_rtx ();
19634 out_label = gen_label_rtx ();
19635 iter = gen_reg_rtx (iter_mode);
19636
19637 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19638 NULL, 1, OPTAB_DIRECT);
19639 /* Those two should combine. */
19640 if (piece_size == const1_rtx)
19641 {
19642 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19643 true, out_label);
19644 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19645 }
19646 emit_move_insn (iter, const0_rtx);
19647
19648 emit_label (top_label);
19649
19650 tmp = convert_modes (Pmode, iter_mode, iter, true);
19651 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19652 destmem = change_address (destmem, mode, x_addr);
19653
19654 if (srcmem)
19655 {
19656 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19657 srcmem = change_address (srcmem, mode, y_addr);
19658
19659 /* When unrolling for chips that reorder memory reads and writes,
19660 we can save registers by using single temporary.
19661 Also using 4 temporaries is overkill in 32bit mode. */
19662 if (!TARGET_64BIT && 0)
19663 {
19664 for (i = 0; i < unroll; i++)
19665 {
19666 if (i)
19667 {
19668 destmem =
19669 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19670 srcmem =
19671 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19672 }
19673 emit_move_insn (destmem, srcmem);
19674 }
19675 }
19676 else
19677 {
19678 rtx tmpreg[4];
19679 gcc_assert (unroll <= 4);
19680 for (i = 0; i < unroll; i++)
19681 {
19682 tmpreg[i] = gen_reg_rtx (mode);
19683 if (i)
19684 {
19685 srcmem =
19686 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19687 }
19688 emit_move_insn (tmpreg[i], srcmem);
19689 }
19690 for (i = 0; i < unroll; i++)
19691 {
19692 if (i)
19693 {
19694 destmem =
19695 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19696 }
19697 emit_move_insn (destmem, tmpreg[i]);
19698 }
19699 }
19700 }
19701 else
19702 for (i = 0; i < unroll; i++)
19703 {
19704 if (i)
19705 destmem =
19706 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19707 emit_move_insn (destmem, value);
19708 }
19709
19710 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19711 true, OPTAB_LIB_WIDEN);
19712 if (tmp != iter)
19713 emit_move_insn (iter, tmp);
19714
19715 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19716 true, top_label);
19717 if (expected_size != -1)
19718 {
19719 expected_size /= GET_MODE_SIZE (mode) * unroll;
19720 if (expected_size == 0)
19721 predict_jump (0);
19722 else if (expected_size > REG_BR_PROB_BASE)
19723 predict_jump (REG_BR_PROB_BASE - 1);
19724 else
19725 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19726 }
19727 else
19728 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19729 iter = ix86_zero_extend_to_Pmode (iter);
19730 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19731 true, OPTAB_LIB_WIDEN);
19732 if (tmp != destptr)
19733 emit_move_insn (destptr, tmp);
19734 if (srcptr)
19735 {
19736 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19737 true, OPTAB_LIB_WIDEN);
19738 if (tmp != srcptr)
19739 emit_move_insn (srcptr, tmp);
19740 }
19741 emit_label (out_label);
19742 }
19743
19744 /* Output "rep; mov" instruction.
19745 Arguments have same meaning as for previous function */
19746 static void
19747 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19748 rtx destptr, rtx srcptr,
19749 rtx count,
19750 enum machine_mode mode)
19751 {
19752 rtx destexp;
19753 rtx srcexp;
19754 rtx countreg;
19755 HOST_WIDE_INT rounded_count;
19756
19757 /* If the size is known, it is shorter to use rep movs. */
19758 if (mode == QImode && CONST_INT_P (count)
19759 && !(INTVAL (count) & 3))
19760 mode = SImode;
19761
19762 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19763 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19764 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19765 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19766 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19767 if (mode != QImode)
19768 {
19769 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19770 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19771 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19772 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19773 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19774 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19775 }
19776 else
19777 {
19778 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19779 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19780 }
19781 if (CONST_INT_P (count))
19782 {
19783 rounded_count = (INTVAL (count)
19784 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19785 destmem = shallow_copy_rtx (destmem);
19786 srcmem = shallow_copy_rtx (srcmem);
19787 set_mem_size (destmem, rounded_count);
19788 set_mem_size (srcmem, rounded_count);
19789 }
19790 else
19791 {
19792 if (MEM_SIZE_KNOWN_P (destmem))
19793 clear_mem_size (destmem);
19794 if (MEM_SIZE_KNOWN_P (srcmem))
19795 clear_mem_size (srcmem);
19796 }
19797 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19798 destexp, srcexp));
19799 }
19800
19801 /* Output "rep; stos" instruction.
19802 Arguments have same meaning as for previous function */
19803 static void
19804 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19805 rtx count, enum machine_mode mode,
19806 rtx orig_value)
19807 {
19808 rtx destexp;
19809 rtx countreg;
19810 HOST_WIDE_INT rounded_count;
19811
19812 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19813 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19814 value = force_reg (mode, gen_lowpart (mode, value));
19815 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19816 if (mode != QImode)
19817 {
19818 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19819 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19820 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19821 }
19822 else
19823 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19824 if (orig_value == const0_rtx && CONST_INT_P (count))
19825 {
19826 rounded_count = (INTVAL (count)
19827 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19828 destmem = shallow_copy_rtx (destmem);
19829 set_mem_size (destmem, rounded_count);
19830 }
19831 else if (MEM_SIZE_KNOWN_P (destmem))
19832 clear_mem_size (destmem);
19833 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19834 }
19835
19836 static void
19837 emit_strmov (rtx destmem, rtx srcmem,
19838 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19839 {
19840 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19841 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19842 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19843 }
19844
19845 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19846 static void
19847 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19848 rtx destptr, rtx srcptr, rtx count, int max_size)
19849 {
19850 rtx src, dest;
19851 if (CONST_INT_P (count))
19852 {
19853 HOST_WIDE_INT countval = INTVAL (count);
19854 int offset = 0;
19855
19856 if ((countval & 0x10) && max_size > 16)
19857 {
19858 if (TARGET_64BIT)
19859 {
19860 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19861 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19862 }
19863 else
19864 gcc_unreachable ();
19865 offset += 16;
19866 }
19867 if ((countval & 0x08) && max_size > 8)
19868 {
19869 if (TARGET_64BIT)
19870 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19871 else
19872 {
19873 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19874 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19875 }
19876 offset += 8;
19877 }
19878 if ((countval & 0x04) && max_size > 4)
19879 {
19880 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19881 offset += 4;
19882 }
19883 if ((countval & 0x02) && max_size > 2)
19884 {
19885 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19886 offset += 2;
19887 }
19888 if ((countval & 0x01) && max_size > 1)
19889 {
19890 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19891 offset += 1;
19892 }
19893 return;
19894 }
19895 if (max_size > 8)
19896 {
19897 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19898 count, 1, OPTAB_DIRECT);
19899 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19900 count, QImode, 1, 4);
19901 return;
19902 }
19903
19904 /* When there are stringops, we can cheaply increase dest and src pointers.
19905 Otherwise we save code size by maintaining offset (zero is readily
19906 available from preceding rep operation) and using x86 addressing modes.
19907 */
19908 if (TARGET_SINGLE_STRINGOP)
19909 {
19910 if (max_size > 4)
19911 {
19912 rtx label = ix86_expand_aligntest (count, 4, true);
19913 src = change_address (srcmem, SImode, srcptr);
19914 dest = change_address (destmem, SImode, destptr);
19915 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19916 emit_label (label);
19917 LABEL_NUSES (label) = 1;
19918 }
19919 if (max_size > 2)
19920 {
19921 rtx label = ix86_expand_aligntest (count, 2, true);
19922 src = change_address (srcmem, HImode, srcptr);
19923 dest = change_address (destmem, HImode, destptr);
19924 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19925 emit_label (label);
19926 LABEL_NUSES (label) = 1;
19927 }
19928 if (max_size > 1)
19929 {
19930 rtx label = ix86_expand_aligntest (count, 1, true);
19931 src = change_address (srcmem, QImode, srcptr);
19932 dest = change_address (destmem, QImode, destptr);
19933 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19934 emit_label (label);
19935 LABEL_NUSES (label) = 1;
19936 }
19937 }
19938 else
19939 {
19940 rtx offset = force_reg (Pmode, const0_rtx);
19941 rtx tmp;
19942
19943 if (max_size > 4)
19944 {
19945 rtx label = ix86_expand_aligntest (count, 4, true);
19946 src = change_address (srcmem, SImode, srcptr);
19947 dest = change_address (destmem, SImode, destptr);
19948 emit_move_insn (dest, src);
19949 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19950 true, OPTAB_LIB_WIDEN);
19951 if (tmp != offset)
19952 emit_move_insn (offset, tmp);
19953 emit_label (label);
19954 LABEL_NUSES (label) = 1;
19955 }
19956 if (max_size > 2)
19957 {
19958 rtx label = ix86_expand_aligntest (count, 2, true);
19959 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19960 src = change_address (srcmem, HImode, tmp);
19961 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19962 dest = change_address (destmem, HImode, tmp);
19963 emit_move_insn (dest, src);
19964 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19965 true, OPTAB_LIB_WIDEN);
19966 if (tmp != offset)
19967 emit_move_insn (offset, tmp);
19968 emit_label (label);
19969 LABEL_NUSES (label) = 1;
19970 }
19971 if (max_size > 1)
19972 {
19973 rtx label = ix86_expand_aligntest (count, 1, true);
19974 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19975 src = change_address (srcmem, QImode, tmp);
19976 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19977 dest = change_address (destmem, QImode, tmp);
19978 emit_move_insn (dest, src);
19979 emit_label (label);
19980 LABEL_NUSES (label) = 1;
19981 }
19982 }
19983 }
19984
19985 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19986 static void
19987 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19988 rtx count, int max_size)
19989 {
19990 count =
19991 expand_simple_binop (counter_mode (count), AND, count,
19992 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19993 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19994 gen_lowpart (QImode, value), count, QImode,
19995 1, max_size / 2);
19996 }
19997
19998 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19999 static void
20000 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20001 {
20002 rtx dest;
20003
20004 if (CONST_INT_P (count))
20005 {
20006 HOST_WIDE_INT countval = INTVAL (count);
20007 int offset = 0;
20008
20009 if ((countval & 0x10) && max_size > 16)
20010 {
20011 if (TARGET_64BIT)
20012 {
20013 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20014 emit_insn (gen_strset (destptr, dest, value));
20015 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20016 emit_insn (gen_strset (destptr, dest, value));
20017 }
20018 else
20019 gcc_unreachable ();
20020 offset += 16;
20021 }
20022 if ((countval & 0x08) && max_size > 8)
20023 {
20024 if (TARGET_64BIT)
20025 {
20026 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20027 emit_insn (gen_strset (destptr, dest, value));
20028 }
20029 else
20030 {
20031 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20032 emit_insn (gen_strset (destptr, dest, value));
20033 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20034 emit_insn (gen_strset (destptr, dest, value));
20035 }
20036 offset += 8;
20037 }
20038 if ((countval & 0x04) && max_size > 4)
20039 {
20040 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20041 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20042 offset += 4;
20043 }
20044 if ((countval & 0x02) && max_size > 2)
20045 {
20046 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20047 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20048 offset += 2;
20049 }
20050 if ((countval & 0x01) && max_size > 1)
20051 {
20052 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20053 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20054 offset += 1;
20055 }
20056 return;
20057 }
20058 if (max_size > 32)
20059 {
20060 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20061 return;
20062 }
20063 if (max_size > 16)
20064 {
20065 rtx label = ix86_expand_aligntest (count, 16, true);
20066 if (TARGET_64BIT)
20067 {
20068 dest = change_address (destmem, DImode, destptr);
20069 emit_insn (gen_strset (destptr, dest, value));
20070 emit_insn (gen_strset (destptr, dest, value));
20071 }
20072 else
20073 {
20074 dest = change_address (destmem, SImode, destptr);
20075 emit_insn (gen_strset (destptr, dest, value));
20076 emit_insn (gen_strset (destptr, dest, value));
20077 emit_insn (gen_strset (destptr, dest, value));
20078 emit_insn (gen_strset (destptr, dest, value));
20079 }
20080 emit_label (label);
20081 LABEL_NUSES (label) = 1;
20082 }
20083 if (max_size > 8)
20084 {
20085 rtx label = ix86_expand_aligntest (count, 8, true);
20086 if (TARGET_64BIT)
20087 {
20088 dest = change_address (destmem, DImode, destptr);
20089 emit_insn (gen_strset (destptr, dest, value));
20090 }
20091 else
20092 {
20093 dest = change_address (destmem, SImode, destptr);
20094 emit_insn (gen_strset (destptr, dest, value));
20095 emit_insn (gen_strset (destptr, dest, value));
20096 }
20097 emit_label (label);
20098 LABEL_NUSES (label) = 1;
20099 }
20100 if (max_size > 4)
20101 {
20102 rtx label = ix86_expand_aligntest (count, 4, true);
20103 dest = change_address (destmem, SImode, destptr);
20104 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20105 emit_label (label);
20106 LABEL_NUSES (label) = 1;
20107 }
20108 if (max_size > 2)
20109 {
20110 rtx label = ix86_expand_aligntest (count, 2, true);
20111 dest = change_address (destmem, HImode, destptr);
20112 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20113 emit_label (label);
20114 LABEL_NUSES (label) = 1;
20115 }
20116 if (max_size > 1)
20117 {
20118 rtx label = ix86_expand_aligntest (count, 1, true);
20119 dest = change_address (destmem, QImode, destptr);
20120 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20121 emit_label (label);
20122 LABEL_NUSES (label) = 1;
20123 }
20124 }
20125
20126 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20127 DESIRED_ALIGNMENT. */
20128 static void
20129 expand_movmem_prologue (rtx destmem, rtx srcmem,
20130 rtx destptr, rtx srcptr, rtx count,
20131 int align, int desired_alignment)
20132 {
20133 if (align <= 1 && desired_alignment > 1)
20134 {
20135 rtx label = ix86_expand_aligntest (destptr, 1, false);
20136 srcmem = change_address (srcmem, QImode, srcptr);
20137 destmem = change_address (destmem, QImode, destptr);
20138 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20139 ix86_adjust_counter (count, 1);
20140 emit_label (label);
20141 LABEL_NUSES (label) = 1;
20142 }
20143 if (align <= 2 && desired_alignment > 2)
20144 {
20145 rtx label = ix86_expand_aligntest (destptr, 2, false);
20146 srcmem = change_address (srcmem, HImode, srcptr);
20147 destmem = change_address (destmem, HImode, destptr);
20148 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20149 ix86_adjust_counter (count, 2);
20150 emit_label (label);
20151 LABEL_NUSES (label) = 1;
20152 }
20153 if (align <= 4 && desired_alignment > 4)
20154 {
20155 rtx label = ix86_expand_aligntest (destptr, 4, false);
20156 srcmem = change_address (srcmem, SImode, srcptr);
20157 destmem = change_address (destmem, SImode, destptr);
20158 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20159 ix86_adjust_counter (count, 4);
20160 emit_label (label);
20161 LABEL_NUSES (label) = 1;
20162 }
20163 gcc_assert (desired_alignment <= 8);
20164 }
20165
20166 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20167 ALIGN_BYTES is how many bytes need to be copied. */
20168 static rtx
20169 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20170 int desired_align, int align_bytes)
20171 {
20172 rtx src = *srcp;
20173 rtx orig_dst = dst;
20174 rtx orig_src = src;
20175 int off = 0;
20176 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20177 if (src_align_bytes >= 0)
20178 src_align_bytes = desired_align - src_align_bytes;
20179 if (align_bytes & 1)
20180 {
20181 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20182 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20183 off = 1;
20184 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20185 }
20186 if (align_bytes & 2)
20187 {
20188 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20189 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20190 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20191 set_mem_align (dst, 2 * BITS_PER_UNIT);
20192 if (src_align_bytes >= 0
20193 && (src_align_bytes & 1) == (align_bytes & 1)
20194 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20195 set_mem_align (src, 2 * BITS_PER_UNIT);
20196 off = 2;
20197 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20198 }
20199 if (align_bytes & 4)
20200 {
20201 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20202 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20203 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20204 set_mem_align (dst, 4 * BITS_PER_UNIT);
20205 if (src_align_bytes >= 0)
20206 {
20207 unsigned int src_align = 0;
20208 if ((src_align_bytes & 3) == (align_bytes & 3))
20209 src_align = 4;
20210 else if ((src_align_bytes & 1) == (align_bytes & 1))
20211 src_align = 2;
20212 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20213 set_mem_align (src, src_align * BITS_PER_UNIT);
20214 }
20215 off = 4;
20216 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20217 }
20218 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20219 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20220 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20221 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20222 if (src_align_bytes >= 0)
20223 {
20224 unsigned int src_align = 0;
20225 if ((src_align_bytes & 7) == (align_bytes & 7))
20226 src_align = 8;
20227 else if ((src_align_bytes & 3) == (align_bytes & 3))
20228 src_align = 4;
20229 else if ((src_align_bytes & 1) == (align_bytes & 1))
20230 src_align = 2;
20231 if (src_align > (unsigned int) desired_align)
20232 src_align = desired_align;
20233 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20234 set_mem_align (src, src_align * BITS_PER_UNIT);
20235 }
20236 if (MEM_SIZE_KNOWN_P (orig_dst))
20237 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20238 if (MEM_SIZE_KNOWN_P (orig_src))
20239 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20240 *srcp = src;
20241 return dst;
20242 }
20243
20244 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20245 DESIRED_ALIGNMENT. */
20246 static void
20247 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20248 int align, int desired_alignment)
20249 {
20250 if (align <= 1 && desired_alignment > 1)
20251 {
20252 rtx label = ix86_expand_aligntest (destptr, 1, false);
20253 destmem = change_address (destmem, QImode, destptr);
20254 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20255 ix86_adjust_counter (count, 1);
20256 emit_label (label);
20257 LABEL_NUSES (label) = 1;
20258 }
20259 if (align <= 2 && desired_alignment > 2)
20260 {
20261 rtx label = ix86_expand_aligntest (destptr, 2, false);
20262 destmem = change_address (destmem, HImode, destptr);
20263 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20264 ix86_adjust_counter (count, 2);
20265 emit_label (label);
20266 LABEL_NUSES (label) = 1;
20267 }
20268 if (align <= 4 && desired_alignment > 4)
20269 {
20270 rtx label = ix86_expand_aligntest (destptr, 4, false);
20271 destmem = change_address (destmem, SImode, destptr);
20272 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20273 ix86_adjust_counter (count, 4);
20274 emit_label (label);
20275 LABEL_NUSES (label) = 1;
20276 }
20277 gcc_assert (desired_alignment <= 8);
20278 }
20279
20280 /* Set enough from DST to align DST known to by aligned by ALIGN to
20281 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20282 static rtx
20283 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20284 int desired_align, int align_bytes)
20285 {
20286 int off = 0;
20287 rtx orig_dst = dst;
20288 if (align_bytes & 1)
20289 {
20290 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20291 off = 1;
20292 emit_insn (gen_strset (destreg, dst,
20293 gen_lowpart (QImode, value)));
20294 }
20295 if (align_bytes & 2)
20296 {
20297 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20298 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20299 set_mem_align (dst, 2 * BITS_PER_UNIT);
20300 off = 2;
20301 emit_insn (gen_strset (destreg, dst,
20302 gen_lowpart (HImode, value)));
20303 }
20304 if (align_bytes & 4)
20305 {
20306 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20307 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20308 set_mem_align (dst, 4 * BITS_PER_UNIT);
20309 off = 4;
20310 emit_insn (gen_strset (destreg, dst,
20311 gen_lowpart (SImode, value)));
20312 }
20313 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20314 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20315 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20316 if (MEM_SIZE_KNOWN_P (orig_dst))
20317 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20318 return dst;
20319 }
20320
20321 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20322 static enum stringop_alg
20323 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20324 int *dynamic_check)
20325 {
20326 const struct stringop_algs * algs;
20327 bool optimize_for_speed;
20328 /* Algorithms using the rep prefix want at least edi and ecx;
20329 additionally, memset wants eax and memcpy wants esi. Don't
20330 consider such algorithms if the user has appropriated those
20331 registers for their own purposes. */
20332 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20333 || (memset
20334 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20335
20336 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20337 || (alg != rep_prefix_1_byte \
20338 && alg != rep_prefix_4_byte \
20339 && alg != rep_prefix_8_byte))
20340 const struct processor_costs *cost;
20341
20342 /* Even if the string operation call is cold, we still might spend a lot
20343 of time processing large blocks. */
20344 if (optimize_function_for_size_p (cfun)
20345 || (optimize_insn_for_size_p ()
20346 && expected_size != -1 && expected_size < 256))
20347 optimize_for_speed = false;
20348 else
20349 optimize_for_speed = true;
20350
20351 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20352
20353 *dynamic_check = -1;
20354 if (memset)
20355 algs = &cost->memset[TARGET_64BIT != 0];
20356 else
20357 algs = &cost->memcpy[TARGET_64BIT != 0];
20358 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20359 return ix86_stringop_alg;
20360 /* rep; movq or rep; movl is the smallest variant. */
20361 else if (!optimize_for_speed)
20362 {
20363 if (!count || (count & 3))
20364 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20365 else
20366 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20367 }
20368 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20369 */
20370 else if (expected_size != -1 && expected_size < 4)
20371 return loop_1_byte;
20372 else if (expected_size != -1)
20373 {
20374 unsigned int i;
20375 enum stringop_alg alg = libcall;
20376 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20377 {
20378 /* We get here if the algorithms that were not libcall-based
20379 were rep-prefix based and we are unable to use rep prefixes
20380 based on global register usage. Break out of the loop and
20381 use the heuristic below. */
20382 if (algs->size[i].max == 0)
20383 break;
20384 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20385 {
20386 enum stringop_alg candidate = algs->size[i].alg;
20387
20388 if (candidate != libcall && ALG_USABLE_P (candidate))
20389 alg = candidate;
20390 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20391 last non-libcall inline algorithm. */
20392 if (TARGET_INLINE_ALL_STRINGOPS)
20393 {
20394 /* When the current size is best to be copied by a libcall,
20395 but we are still forced to inline, run the heuristic below
20396 that will pick code for medium sized blocks. */
20397 if (alg != libcall)
20398 return alg;
20399 break;
20400 }
20401 else if (ALG_USABLE_P (candidate))
20402 return candidate;
20403 }
20404 }
20405 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20406 }
20407 /* When asked to inline the call anyway, try to pick meaningful choice.
20408 We look for maximal size of block that is faster to copy by hand and
20409 take blocks of at most of that size guessing that average size will
20410 be roughly half of the block.
20411
20412 If this turns out to be bad, we might simply specify the preferred
20413 choice in ix86_costs. */
20414 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20415 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20416 {
20417 int max = -1;
20418 enum stringop_alg alg;
20419 int i;
20420 bool any_alg_usable_p = true;
20421
20422 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20423 {
20424 enum stringop_alg candidate = algs->size[i].alg;
20425 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20426
20427 if (candidate != libcall && candidate
20428 && ALG_USABLE_P (candidate))
20429 max = algs->size[i].max;
20430 }
20431 /* If there aren't any usable algorithms, then recursing on
20432 smaller sizes isn't going to find anything. Just return the
20433 simple byte-at-a-time copy loop. */
20434 if (!any_alg_usable_p)
20435 {
20436 /* Pick something reasonable. */
20437 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20438 *dynamic_check = 128;
20439 return loop_1_byte;
20440 }
20441 if (max == -1)
20442 max = 4096;
20443 alg = decide_alg (count, max / 2, memset, dynamic_check);
20444 gcc_assert (*dynamic_check == -1);
20445 gcc_assert (alg != libcall);
20446 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20447 *dynamic_check = max;
20448 return alg;
20449 }
20450 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20451 #undef ALG_USABLE_P
20452 }
20453
20454 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20455 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20456 static int
20457 decide_alignment (int align,
20458 enum stringop_alg alg,
20459 int expected_size)
20460 {
20461 int desired_align = 0;
20462 switch (alg)
20463 {
20464 case no_stringop:
20465 gcc_unreachable ();
20466 case loop:
20467 case unrolled_loop:
20468 desired_align = GET_MODE_SIZE (Pmode);
20469 break;
20470 case rep_prefix_8_byte:
20471 desired_align = 8;
20472 break;
20473 case rep_prefix_4_byte:
20474 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20475 copying whole cacheline at once. */
20476 if (TARGET_PENTIUMPRO)
20477 desired_align = 8;
20478 else
20479 desired_align = 4;
20480 break;
20481 case rep_prefix_1_byte:
20482 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20483 copying whole cacheline at once. */
20484 if (TARGET_PENTIUMPRO)
20485 desired_align = 8;
20486 else
20487 desired_align = 1;
20488 break;
20489 case loop_1_byte:
20490 desired_align = 1;
20491 break;
20492 case libcall:
20493 return 0;
20494 }
20495
20496 if (optimize_size)
20497 desired_align = 1;
20498 if (desired_align < align)
20499 desired_align = align;
20500 if (expected_size != -1 && expected_size < 4)
20501 desired_align = align;
20502 return desired_align;
20503 }
20504
20505 /* Return the smallest power of 2 greater than VAL. */
20506 static int
20507 smallest_pow2_greater_than (int val)
20508 {
20509 int ret = 1;
20510 while (ret <= val)
20511 ret <<= 1;
20512 return ret;
20513 }
20514
20515 /* Expand string move (memcpy) operation. Use i386 string operations
20516 when profitable. expand_setmem contains similar code. The code
20517 depends upon architecture, block size and alignment, but always has
20518 the same overall structure:
20519
20520 1) Prologue guard: Conditional that jumps up to epilogues for small
20521 blocks that can be handled by epilogue alone. This is faster
20522 but also needed for correctness, since prologue assume the block
20523 is larger than the desired alignment.
20524
20525 Optional dynamic check for size and libcall for large
20526 blocks is emitted here too, with -minline-stringops-dynamically.
20527
20528 2) Prologue: copy first few bytes in order to get destination
20529 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20530 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20531 copied. We emit either a jump tree on power of two sized
20532 blocks, or a byte loop.
20533
20534 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20535 with specified algorithm.
20536
20537 4) Epilogue: code copying tail of the block that is too small to be
20538 handled by main body (or up to size guarded by prologue guard). */
20539
20540 bool
20541 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20542 rtx expected_align_exp, rtx expected_size_exp)
20543 {
20544 rtx destreg;
20545 rtx srcreg;
20546 rtx label = NULL;
20547 rtx tmp;
20548 rtx jump_around_label = NULL;
20549 HOST_WIDE_INT align = 1;
20550 unsigned HOST_WIDE_INT count = 0;
20551 HOST_WIDE_INT expected_size = -1;
20552 int size_needed = 0, epilogue_size_needed;
20553 int desired_align = 0, align_bytes = 0;
20554 enum stringop_alg alg;
20555 int dynamic_check;
20556 bool need_zero_guard = false;
20557
20558 if (CONST_INT_P (align_exp))
20559 align = INTVAL (align_exp);
20560 /* i386 can do misaligned access on reasonably increased cost. */
20561 if (CONST_INT_P (expected_align_exp)
20562 && INTVAL (expected_align_exp) > align)
20563 align = INTVAL (expected_align_exp);
20564 /* ALIGN is the minimum of destination and source alignment, but we care here
20565 just about destination alignment. */
20566 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20567 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20568
20569 if (CONST_INT_P (count_exp))
20570 count = expected_size = INTVAL (count_exp);
20571 if (CONST_INT_P (expected_size_exp) && count == 0)
20572 expected_size = INTVAL (expected_size_exp);
20573
20574 /* Make sure we don't need to care about overflow later on. */
20575 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20576 return false;
20577
20578 /* Step 0: Decide on preferred algorithm, desired alignment and
20579 size of chunks to be copied by main loop. */
20580
20581 alg = decide_alg (count, expected_size, false, &dynamic_check);
20582 desired_align = decide_alignment (align, alg, expected_size);
20583
20584 if (!TARGET_ALIGN_STRINGOPS)
20585 align = desired_align;
20586
20587 if (alg == libcall)
20588 return false;
20589 gcc_assert (alg != no_stringop);
20590 if (!count)
20591 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20592 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20593 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20594 switch (alg)
20595 {
20596 case libcall:
20597 case no_stringop:
20598 gcc_unreachable ();
20599 case loop:
20600 need_zero_guard = true;
20601 size_needed = GET_MODE_SIZE (Pmode);
20602 break;
20603 case unrolled_loop:
20604 need_zero_guard = true;
20605 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20606 break;
20607 case rep_prefix_8_byte:
20608 size_needed = 8;
20609 break;
20610 case rep_prefix_4_byte:
20611 size_needed = 4;
20612 break;
20613 case rep_prefix_1_byte:
20614 size_needed = 1;
20615 break;
20616 case loop_1_byte:
20617 need_zero_guard = true;
20618 size_needed = 1;
20619 break;
20620 }
20621
20622 epilogue_size_needed = size_needed;
20623
20624 /* Step 1: Prologue guard. */
20625
20626 /* Alignment code needs count to be in register. */
20627 if (CONST_INT_P (count_exp) && desired_align > align)
20628 {
20629 if (INTVAL (count_exp) > desired_align
20630 && INTVAL (count_exp) > size_needed)
20631 {
20632 align_bytes
20633 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20634 if (align_bytes <= 0)
20635 align_bytes = 0;
20636 else
20637 align_bytes = desired_align - align_bytes;
20638 }
20639 if (align_bytes == 0)
20640 count_exp = force_reg (counter_mode (count_exp), count_exp);
20641 }
20642 gcc_assert (desired_align >= 1 && align >= 1);
20643
20644 /* Ensure that alignment prologue won't copy past end of block. */
20645 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20646 {
20647 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20648 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20649 Make sure it is power of 2. */
20650 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20651
20652 if (count)
20653 {
20654 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20655 {
20656 /* If main algorithm works on QImode, no epilogue is needed.
20657 For small sizes just don't align anything. */
20658 if (size_needed == 1)
20659 desired_align = align;
20660 else
20661 goto epilogue;
20662 }
20663 }
20664 else
20665 {
20666 label = gen_label_rtx ();
20667 emit_cmp_and_jump_insns (count_exp,
20668 GEN_INT (epilogue_size_needed),
20669 LTU, 0, counter_mode (count_exp), 1, label);
20670 if (expected_size == -1 || expected_size < epilogue_size_needed)
20671 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20672 else
20673 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20674 }
20675 }
20676
20677 /* Emit code to decide on runtime whether library call or inline should be
20678 used. */
20679 if (dynamic_check != -1)
20680 {
20681 if (CONST_INT_P (count_exp))
20682 {
20683 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20684 {
20685 emit_block_move_via_libcall (dst, src, count_exp, false);
20686 count_exp = const0_rtx;
20687 goto epilogue;
20688 }
20689 }
20690 else
20691 {
20692 rtx hot_label = gen_label_rtx ();
20693 jump_around_label = gen_label_rtx ();
20694 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20695 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20696 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20697 emit_block_move_via_libcall (dst, src, count_exp, false);
20698 emit_jump (jump_around_label);
20699 emit_label (hot_label);
20700 }
20701 }
20702
20703 /* Step 2: Alignment prologue. */
20704
20705 if (desired_align > align)
20706 {
20707 if (align_bytes == 0)
20708 {
20709 /* Except for the first move in epilogue, we no longer know
20710 constant offset in aliasing info. It don't seems to worth
20711 the pain to maintain it for the first move, so throw away
20712 the info early. */
20713 src = change_address (src, BLKmode, srcreg);
20714 dst = change_address (dst, BLKmode, destreg);
20715 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20716 desired_align);
20717 }
20718 else
20719 {
20720 /* If we know how many bytes need to be stored before dst is
20721 sufficiently aligned, maintain aliasing info accurately. */
20722 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20723 desired_align, align_bytes);
20724 count_exp = plus_constant (count_exp, -align_bytes);
20725 count -= align_bytes;
20726 }
20727 if (need_zero_guard
20728 && (count < (unsigned HOST_WIDE_INT) size_needed
20729 || (align_bytes == 0
20730 && count < ((unsigned HOST_WIDE_INT) size_needed
20731 + desired_align - align))))
20732 {
20733 /* It is possible that we copied enough so the main loop will not
20734 execute. */
20735 gcc_assert (size_needed > 1);
20736 if (label == NULL_RTX)
20737 label = gen_label_rtx ();
20738 emit_cmp_and_jump_insns (count_exp,
20739 GEN_INT (size_needed),
20740 LTU, 0, counter_mode (count_exp), 1, label);
20741 if (expected_size == -1
20742 || expected_size < (desired_align - align) / 2 + size_needed)
20743 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20744 else
20745 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20746 }
20747 }
20748 if (label && size_needed == 1)
20749 {
20750 emit_label (label);
20751 LABEL_NUSES (label) = 1;
20752 label = NULL;
20753 epilogue_size_needed = 1;
20754 }
20755 else if (label == NULL_RTX)
20756 epilogue_size_needed = size_needed;
20757
20758 /* Step 3: Main loop. */
20759
20760 switch (alg)
20761 {
20762 case libcall:
20763 case no_stringop:
20764 gcc_unreachable ();
20765 case loop_1_byte:
20766 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20767 count_exp, QImode, 1, expected_size);
20768 break;
20769 case loop:
20770 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20771 count_exp, Pmode, 1, expected_size);
20772 break;
20773 case unrolled_loop:
20774 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20775 registers for 4 temporaries anyway. */
20776 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20777 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20778 expected_size);
20779 break;
20780 case rep_prefix_8_byte:
20781 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20782 DImode);
20783 break;
20784 case rep_prefix_4_byte:
20785 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20786 SImode);
20787 break;
20788 case rep_prefix_1_byte:
20789 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20790 QImode);
20791 break;
20792 }
20793 /* Adjust properly the offset of src and dest memory for aliasing. */
20794 if (CONST_INT_P (count_exp))
20795 {
20796 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20797 (count / size_needed) * size_needed);
20798 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20799 (count / size_needed) * size_needed);
20800 }
20801 else
20802 {
20803 src = change_address (src, BLKmode, srcreg);
20804 dst = change_address (dst, BLKmode, destreg);
20805 }
20806
20807 /* Step 4: Epilogue to copy the remaining bytes. */
20808 epilogue:
20809 if (label)
20810 {
20811 /* When the main loop is done, COUNT_EXP might hold original count,
20812 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20813 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20814 bytes. Compensate if needed. */
20815
20816 if (size_needed < epilogue_size_needed)
20817 {
20818 tmp =
20819 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20820 GEN_INT (size_needed - 1), count_exp, 1,
20821 OPTAB_DIRECT);
20822 if (tmp != count_exp)
20823 emit_move_insn (count_exp, tmp);
20824 }
20825 emit_label (label);
20826 LABEL_NUSES (label) = 1;
20827 }
20828
20829 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20830 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20831 epilogue_size_needed);
20832 if (jump_around_label)
20833 emit_label (jump_around_label);
20834 return true;
20835 }
20836
20837 /* Helper function for memcpy. For QImode value 0xXY produce
20838 0xXYXYXYXY of wide specified by MODE. This is essentially
20839 a * 0x10101010, but we can do slightly better than
20840 synth_mult by unwinding the sequence by hand on CPUs with
20841 slow multiply. */
20842 static rtx
20843 promote_duplicated_reg (enum machine_mode mode, rtx val)
20844 {
20845 enum machine_mode valmode = GET_MODE (val);
20846 rtx tmp;
20847 int nops = mode == DImode ? 3 : 2;
20848
20849 gcc_assert (mode == SImode || mode == DImode);
20850 if (val == const0_rtx)
20851 return copy_to_mode_reg (mode, const0_rtx);
20852 if (CONST_INT_P (val))
20853 {
20854 HOST_WIDE_INT v = INTVAL (val) & 255;
20855
20856 v |= v << 8;
20857 v |= v << 16;
20858 if (mode == DImode)
20859 v |= (v << 16) << 16;
20860 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20861 }
20862
20863 if (valmode == VOIDmode)
20864 valmode = QImode;
20865 if (valmode != QImode)
20866 val = gen_lowpart (QImode, val);
20867 if (mode == QImode)
20868 return val;
20869 if (!TARGET_PARTIAL_REG_STALL)
20870 nops--;
20871 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20872 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20873 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20874 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20875 {
20876 rtx reg = convert_modes (mode, QImode, val, true);
20877 tmp = promote_duplicated_reg (mode, const1_rtx);
20878 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20879 OPTAB_DIRECT);
20880 }
20881 else
20882 {
20883 rtx reg = convert_modes (mode, QImode, val, true);
20884
20885 if (!TARGET_PARTIAL_REG_STALL)
20886 if (mode == SImode)
20887 emit_insn (gen_movsi_insv_1 (reg, reg));
20888 else
20889 emit_insn (gen_movdi_insv_1 (reg, reg));
20890 else
20891 {
20892 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20893 NULL, 1, OPTAB_DIRECT);
20894 reg =
20895 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20896 }
20897 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20898 NULL, 1, OPTAB_DIRECT);
20899 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20900 if (mode == SImode)
20901 return reg;
20902 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20903 NULL, 1, OPTAB_DIRECT);
20904 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20905 return reg;
20906 }
20907 }
20908
20909 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20910 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20911 alignment from ALIGN to DESIRED_ALIGN. */
20912 static rtx
20913 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20914 {
20915 rtx promoted_val;
20916
20917 if (TARGET_64BIT
20918 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20919 promoted_val = promote_duplicated_reg (DImode, val);
20920 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20921 promoted_val = promote_duplicated_reg (SImode, val);
20922 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20923 promoted_val = promote_duplicated_reg (HImode, val);
20924 else
20925 promoted_val = val;
20926
20927 return promoted_val;
20928 }
20929
20930 /* Expand string clear operation (bzero). Use i386 string operations when
20931 profitable. See expand_movmem comment for explanation of individual
20932 steps performed. */
20933 bool
20934 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20935 rtx expected_align_exp, rtx expected_size_exp)
20936 {
20937 rtx destreg;
20938 rtx label = NULL;
20939 rtx tmp;
20940 rtx jump_around_label = NULL;
20941 HOST_WIDE_INT align = 1;
20942 unsigned HOST_WIDE_INT count = 0;
20943 HOST_WIDE_INT expected_size = -1;
20944 int size_needed = 0, epilogue_size_needed;
20945 int desired_align = 0, align_bytes = 0;
20946 enum stringop_alg alg;
20947 rtx promoted_val = NULL;
20948 bool force_loopy_epilogue = false;
20949 int dynamic_check;
20950 bool need_zero_guard = false;
20951
20952 if (CONST_INT_P (align_exp))
20953 align = INTVAL (align_exp);
20954 /* i386 can do misaligned access on reasonably increased cost. */
20955 if (CONST_INT_P (expected_align_exp)
20956 && INTVAL (expected_align_exp) > align)
20957 align = INTVAL (expected_align_exp);
20958 if (CONST_INT_P (count_exp))
20959 count = expected_size = INTVAL (count_exp);
20960 if (CONST_INT_P (expected_size_exp) && count == 0)
20961 expected_size = INTVAL (expected_size_exp);
20962
20963 /* Make sure we don't need to care about overflow later on. */
20964 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20965 return false;
20966
20967 /* Step 0: Decide on preferred algorithm, desired alignment and
20968 size of chunks to be copied by main loop. */
20969
20970 alg = decide_alg (count, expected_size, true, &dynamic_check);
20971 desired_align = decide_alignment (align, alg, expected_size);
20972
20973 if (!TARGET_ALIGN_STRINGOPS)
20974 align = desired_align;
20975
20976 if (alg == libcall)
20977 return false;
20978 gcc_assert (alg != no_stringop);
20979 if (!count)
20980 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20981 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20982 switch (alg)
20983 {
20984 case libcall:
20985 case no_stringop:
20986 gcc_unreachable ();
20987 case loop:
20988 need_zero_guard = true;
20989 size_needed = GET_MODE_SIZE (Pmode);
20990 break;
20991 case unrolled_loop:
20992 need_zero_guard = true;
20993 size_needed = GET_MODE_SIZE (Pmode) * 4;
20994 break;
20995 case rep_prefix_8_byte:
20996 size_needed = 8;
20997 break;
20998 case rep_prefix_4_byte:
20999 size_needed = 4;
21000 break;
21001 case rep_prefix_1_byte:
21002 size_needed = 1;
21003 break;
21004 case loop_1_byte:
21005 need_zero_guard = true;
21006 size_needed = 1;
21007 break;
21008 }
21009 epilogue_size_needed = size_needed;
21010
21011 /* Step 1: Prologue guard. */
21012
21013 /* Alignment code needs count to be in register. */
21014 if (CONST_INT_P (count_exp) && desired_align > align)
21015 {
21016 if (INTVAL (count_exp) > desired_align
21017 && INTVAL (count_exp) > size_needed)
21018 {
21019 align_bytes
21020 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21021 if (align_bytes <= 0)
21022 align_bytes = 0;
21023 else
21024 align_bytes = desired_align - align_bytes;
21025 }
21026 if (align_bytes == 0)
21027 {
21028 enum machine_mode mode = SImode;
21029 if (TARGET_64BIT && (count & ~0xffffffff))
21030 mode = DImode;
21031 count_exp = force_reg (mode, count_exp);
21032 }
21033 }
21034 /* Do the cheap promotion to allow better CSE across the
21035 main loop and epilogue (ie one load of the big constant in the
21036 front of all code. */
21037 if (CONST_INT_P (val_exp))
21038 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21039 desired_align, align);
21040 /* Ensure that alignment prologue won't copy past end of block. */
21041 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21042 {
21043 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21044 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21045 Make sure it is power of 2. */
21046 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21047
21048 /* To improve performance of small blocks, we jump around the VAL
21049 promoting mode. This mean that if the promoted VAL is not constant,
21050 we might not use it in the epilogue and have to use byte
21051 loop variant. */
21052 if (epilogue_size_needed > 2 && !promoted_val)
21053 force_loopy_epilogue = true;
21054 if (count)
21055 {
21056 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21057 {
21058 /* If main algorithm works on QImode, no epilogue is needed.
21059 For small sizes just don't align anything. */
21060 if (size_needed == 1)
21061 desired_align = align;
21062 else
21063 goto epilogue;
21064 }
21065 }
21066 else
21067 {
21068 label = gen_label_rtx ();
21069 emit_cmp_and_jump_insns (count_exp,
21070 GEN_INT (epilogue_size_needed),
21071 LTU, 0, counter_mode (count_exp), 1, label);
21072 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21073 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21074 else
21075 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21076 }
21077 }
21078 if (dynamic_check != -1)
21079 {
21080 rtx hot_label = gen_label_rtx ();
21081 jump_around_label = gen_label_rtx ();
21082 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21083 LEU, 0, counter_mode (count_exp), 1, hot_label);
21084 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21085 set_storage_via_libcall (dst, count_exp, val_exp, false);
21086 emit_jump (jump_around_label);
21087 emit_label (hot_label);
21088 }
21089
21090 /* Step 2: Alignment prologue. */
21091
21092 /* Do the expensive promotion once we branched off the small blocks. */
21093 if (!promoted_val)
21094 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21095 desired_align, align);
21096 gcc_assert (desired_align >= 1 && align >= 1);
21097
21098 if (desired_align > align)
21099 {
21100 if (align_bytes == 0)
21101 {
21102 /* Except for the first move in epilogue, we no longer know
21103 constant offset in aliasing info. It don't seems to worth
21104 the pain to maintain it for the first move, so throw away
21105 the info early. */
21106 dst = change_address (dst, BLKmode, destreg);
21107 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21108 desired_align);
21109 }
21110 else
21111 {
21112 /* If we know how many bytes need to be stored before dst is
21113 sufficiently aligned, maintain aliasing info accurately. */
21114 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21115 desired_align, align_bytes);
21116 count_exp = plus_constant (count_exp, -align_bytes);
21117 count -= align_bytes;
21118 }
21119 if (need_zero_guard
21120 && (count < (unsigned HOST_WIDE_INT) size_needed
21121 || (align_bytes == 0
21122 && count < ((unsigned HOST_WIDE_INT) size_needed
21123 + desired_align - align))))
21124 {
21125 /* It is possible that we copied enough so the main loop will not
21126 execute. */
21127 gcc_assert (size_needed > 1);
21128 if (label == NULL_RTX)
21129 label = gen_label_rtx ();
21130 emit_cmp_and_jump_insns (count_exp,
21131 GEN_INT (size_needed),
21132 LTU, 0, counter_mode (count_exp), 1, label);
21133 if (expected_size == -1
21134 || expected_size < (desired_align - align) / 2 + size_needed)
21135 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21136 else
21137 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21138 }
21139 }
21140 if (label && size_needed == 1)
21141 {
21142 emit_label (label);
21143 LABEL_NUSES (label) = 1;
21144 label = NULL;
21145 promoted_val = val_exp;
21146 epilogue_size_needed = 1;
21147 }
21148 else if (label == NULL_RTX)
21149 epilogue_size_needed = size_needed;
21150
21151 /* Step 3: Main loop. */
21152
21153 switch (alg)
21154 {
21155 case libcall:
21156 case no_stringop:
21157 gcc_unreachable ();
21158 case loop_1_byte:
21159 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21160 count_exp, QImode, 1, expected_size);
21161 break;
21162 case loop:
21163 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21164 count_exp, Pmode, 1, expected_size);
21165 break;
21166 case unrolled_loop:
21167 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21168 count_exp, Pmode, 4, expected_size);
21169 break;
21170 case rep_prefix_8_byte:
21171 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21172 DImode, val_exp);
21173 break;
21174 case rep_prefix_4_byte:
21175 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21176 SImode, val_exp);
21177 break;
21178 case rep_prefix_1_byte:
21179 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21180 QImode, val_exp);
21181 break;
21182 }
21183 /* Adjust properly the offset of src and dest memory for aliasing. */
21184 if (CONST_INT_P (count_exp))
21185 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21186 (count / size_needed) * size_needed);
21187 else
21188 dst = change_address (dst, BLKmode, destreg);
21189
21190 /* Step 4: Epilogue to copy the remaining bytes. */
21191
21192 if (label)
21193 {
21194 /* When the main loop is done, COUNT_EXP might hold original count,
21195 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21196 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21197 bytes. Compensate if needed. */
21198
21199 if (size_needed < epilogue_size_needed)
21200 {
21201 tmp =
21202 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21203 GEN_INT (size_needed - 1), count_exp, 1,
21204 OPTAB_DIRECT);
21205 if (tmp != count_exp)
21206 emit_move_insn (count_exp, tmp);
21207 }
21208 emit_label (label);
21209 LABEL_NUSES (label) = 1;
21210 }
21211 epilogue:
21212 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21213 {
21214 if (force_loopy_epilogue)
21215 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21216 epilogue_size_needed);
21217 else
21218 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21219 epilogue_size_needed);
21220 }
21221 if (jump_around_label)
21222 emit_label (jump_around_label);
21223 return true;
21224 }
21225
21226 /* Expand the appropriate insns for doing strlen if not just doing
21227 repnz; scasb
21228
21229 out = result, initialized with the start address
21230 align_rtx = alignment of the address.
21231 scratch = scratch register, initialized with the startaddress when
21232 not aligned, otherwise undefined
21233
21234 This is just the body. It needs the initializations mentioned above and
21235 some address computing at the end. These things are done in i386.md. */
21236
21237 static void
21238 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21239 {
21240 int align;
21241 rtx tmp;
21242 rtx align_2_label = NULL_RTX;
21243 rtx align_3_label = NULL_RTX;
21244 rtx align_4_label = gen_label_rtx ();
21245 rtx end_0_label = gen_label_rtx ();
21246 rtx mem;
21247 rtx tmpreg = gen_reg_rtx (SImode);
21248 rtx scratch = gen_reg_rtx (SImode);
21249 rtx cmp;
21250
21251 align = 0;
21252 if (CONST_INT_P (align_rtx))
21253 align = INTVAL (align_rtx);
21254
21255 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21256
21257 /* Is there a known alignment and is it less than 4? */
21258 if (align < 4)
21259 {
21260 rtx scratch1 = gen_reg_rtx (Pmode);
21261 emit_move_insn (scratch1, out);
21262 /* Is there a known alignment and is it not 2? */
21263 if (align != 2)
21264 {
21265 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21266 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21267
21268 /* Leave just the 3 lower bits. */
21269 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21270 NULL_RTX, 0, OPTAB_WIDEN);
21271
21272 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21273 Pmode, 1, align_4_label);
21274 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21275 Pmode, 1, align_2_label);
21276 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21277 Pmode, 1, align_3_label);
21278 }
21279 else
21280 {
21281 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21282 check if is aligned to 4 - byte. */
21283
21284 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21285 NULL_RTX, 0, OPTAB_WIDEN);
21286
21287 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21288 Pmode, 1, align_4_label);
21289 }
21290
21291 mem = change_address (src, QImode, out);
21292
21293 /* Now compare the bytes. */
21294
21295 /* Compare the first n unaligned byte on a byte per byte basis. */
21296 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21297 QImode, 1, end_0_label);
21298
21299 /* Increment the address. */
21300 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21301
21302 /* Not needed with an alignment of 2 */
21303 if (align != 2)
21304 {
21305 emit_label (align_2_label);
21306
21307 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21308 end_0_label);
21309
21310 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21311
21312 emit_label (align_3_label);
21313 }
21314
21315 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21316 end_0_label);
21317
21318 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21319 }
21320
21321 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21322 align this loop. It gives only huge programs, but does not help to
21323 speed up. */
21324 emit_label (align_4_label);
21325
21326 mem = change_address (src, SImode, out);
21327 emit_move_insn (scratch, mem);
21328 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21329
21330 /* This formula yields a nonzero result iff one of the bytes is zero.
21331 This saves three branches inside loop and many cycles. */
21332
21333 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21334 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21335 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21336 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21337 gen_int_mode (0x80808080, SImode)));
21338 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21339 align_4_label);
21340
21341 if (TARGET_CMOVE)
21342 {
21343 rtx reg = gen_reg_rtx (SImode);
21344 rtx reg2 = gen_reg_rtx (Pmode);
21345 emit_move_insn (reg, tmpreg);
21346 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21347
21348 /* If zero is not in the first two bytes, move two bytes forward. */
21349 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21350 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21351 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21352 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21353 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21354 reg,
21355 tmpreg)));
21356 /* Emit lea manually to avoid clobbering of flags. */
21357 emit_insn (gen_rtx_SET (SImode, reg2,
21358 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21359
21360 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21361 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21362 emit_insn (gen_rtx_SET (VOIDmode, out,
21363 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21364 reg2,
21365 out)));
21366 }
21367 else
21368 {
21369 rtx end_2_label = gen_label_rtx ();
21370 /* Is zero in the first two bytes? */
21371
21372 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21373 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21374 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21375 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21376 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21377 pc_rtx);
21378 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21379 JUMP_LABEL (tmp) = end_2_label;
21380
21381 /* Not in the first two. Move two bytes forward. */
21382 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21383 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21384
21385 emit_label (end_2_label);
21386
21387 }
21388
21389 /* Avoid branch in fixing the byte. */
21390 tmpreg = gen_lowpart (QImode, tmpreg);
21391 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21392 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21393 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21394 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21395
21396 emit_label (end_0_label);
21397 }
21398
21399 /* Expand strlen. */
21400
21401 bool
21402 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21403 {
21404 rtx addr, scratch1, scratch2, scratch3, scratch4;
21405
21406 /* The generic case of strlen expander is long. Avoid it's
21407 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21408
21409 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21410 && !TARGET_INLINE_ALL_STRINGOPS
21411 && !optimize_insn_for_size_p ()
21412 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21413 return false;
21414
21415 addr = force_reg (Pmode, XEXP (src, 0));
21416 scratch1 = gen_reg_rtx (Pmode);
21417
21418 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21419 && !optimize_insn_for_size_p ())
21420 {
21421 /* Well it seems that some optimizer does not combine a call like
21422 foo(strlen(bar), strlen(bar));
21423 when the move and the subtraction is done here. It does calculate
21424 the length just once when these instructions are done inside of
21425 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21426 often used and I use one fewer register for the lifetime of
21427 output_strlen_unroll() this is better. */
21428
21429 emit_move_insn (out, addr);
21430
21431 ix86_expand_strlensi_unroll_1 (out, src, align);
21432
21433 /* strlensi_unroll_1 returns the address of the zero at the end of
21434 the string, like memchr(), so compute the length by subtracting
21435 the start address. */
21436 emit_insn (ix86_gen_sub3 (out, out, addr));
21437 }
21438 else
21439 {
21440 rtx unspec;
21441
21442 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21443 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21444 return false;
21445
21446 scratch2 = gen_reg_rtx (Pmode);
21447 scratch3 = gen_reg_rtx (Pmode);
21448 scratch4 = force_reg (Pmode, constm1_rtx);
21449
21450 emit_move_insn (scratch3, addr);
21451 eoschar = force_reg (QImode, eoschar);
21452
21453 src = replace_equiv_address_nv (src, scratch3);
21454
21455 /* If .md starts supporting :P, this can be done in .md. */
21456 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21457 scratch4), UNSPEC_SCAS);
21458 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21459 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21460 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21461 }
21462 return true;
21463 }
21464
21465 /* For given symbol (function) construct code to compute address of it's PLT
21466 entry in large x86-64 PIC model. */
21467 rtx
21468 construct_plt_address (rtx symbol)
21469 {
21470 rtx tmp = gen_reg_rtx (Pmode);
21471 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21472
21473 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21474 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21475
21476 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21477 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21478 return tmp;
21479 }
21480
21481 rtx
21482 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21483 rtx callarg2,
21484 rtx pop, bool sibcall)
21485 {
21486 rtx use = NULL, call;
21487
21488 if (pop == const0_rtx)
21489 pop = NULL;
21490 gcc_assert (!TARGET_64BIT || !pop);
21491
21492 if (TARGET_MACHO && !TARGET_64BIT)
21493 {
21494 #if TARGET_MACHO
21495 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21496 fnaddr = machopic_indirect_call_target (fnaddr);
21497 #endif
21498 }
21499 else
21500 {
21501 /* Static functions and indirect calls don't need the pic register. */
21502 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21503 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21504 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21505 use_reg (&use, pic_offset_table_rtx);
21506 }
21507
21508 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21509 {
21510 rtx al = gen_rtx_REG (QImode, AX_REG);
21511 emit_move_insn (al, callarg2);
21512 use_reg (&use, al);
21513 }
21514
21515 if (ix86_cmodel == CM_LARGE_PIC
21516 && MEM_P (fnaddr)
21517 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21518 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21519 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21520 else if (sibcall
21521 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21522 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21523 {
21524 fnaddr = XEXP (fnaddr, 0);
21525 if (GET_MODE (fnaddr) != Pmode)
21526 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21527 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21528 }
21529
21530 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21531 if (retval)
21532 call = gen_rtx_SET (VOIDmode, retval, call);
21533 if (pop)
21534 {
21535 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21536 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21537 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21538 }
21539 if (TARGET_64BIT_MS_ABI
21540 && (!callarg2 || INTVAL (callarg2) != -2))
21541 {
21542 /* We need to represent that SI and DI registers are clobbered
21543 by SYSV calls. */
21544 static int clobbered_registers[] = {
21545 XMM6_REG, XMM7_REG, XMM8_REG,
21546 XMM9_REG, XMM10_REG, XMM11_REG,
21547 XMM12_REG, XMM13_REG, XMM14_REG,
21548 XMM15_REG, SI_REG, DI_REG
21549 };
21550 unsigned int i;
21551 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21552 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21553 UNSPEC_MS_TO_SYSV_CALL);
21554
21555 vec[0] = call;
21556 vec[1] = unspec;
21557 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21558 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21559 ? TImode : DImode,
21560 gen_rtx_REG
21561 (SSE_REGNO_P (clobbered_registers[i])
21562 ? TImode : DImode,
21563 clobbered_registers[i]));
21564
21565 call = gen_rtx_PARALLEL (VOIDmode,
21566 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21567 + 2, vec));
21568 }
21569
21570 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21571 if (TARGET_VZEROUPPER)
21572 {
21573 rtx unspec;
21574 int avx256;
21575
21576 if (cfun->machine->callee_pass_avx256_p)
21577 {
21578 if (cfun->machine->callee_return_avx256_p)
21579 avx256 = callee_return_pass_avx256;
21580 else
21581 avx256 = callee_pass_avx256;
21582 }
21583 else if (cfun->machine->callee_return_avx256_p)
21584 avx256 = callee_return_avx256;
21585 else
21586 avx256 = call_no_avx256;
21587
21588 if (reload_completed)
21589 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21590 else
21591 {
21592 unspec = gen_rtx_UNSPEC (VOIDmode,
21593 gen_rtvec (1, GEN_INT (avx256)),
21594 UNSPEC_CALL_NEEDS_VZEROUPPER);
21595 call = gen_rtx_PARALLEL (VOIDmode,
21596 gen_rtvec (2, call, unspec));
21597 }
21598 }
21599
21600 call = emit_call_insn (call);
21601 if (use)
21602 CALL_INSN_FUNCTION_USAGE (call) = use;
21603
21604 return call;
21605 }
21606
21607 void
21608 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21609 {
21610 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21611 emit_insn (gen_avx_vzeroupper (vzeroupper));
21612 emit_call_insn (call);
21613 }
21614
21615 /* Output the assembly for a call instruction. */
21616
21617 const char *
21618 ix86_output_call_insn (rtx insn, rtx call_op)
21619 {
21620 bool direct_p = constant_call_address_operand (call_op, Pmode);
21621 bool seh_nop_p = false;
21622 const char *xasm;
21623
21624 if (SIBLING_CALL_P (insn))
21625 {
21626 if (direct_p)
21627 xasm = "jmp\t%P0";
21628 /* SEH epilogue detection requires the indirect branch case
21629 to include REX.W. */
21630 else if (TARGET_SEH)
21631 xasm = "rex.W jmp %A0";
21632 else
21633 xasm = "jmp\t%A0";
21634
21635 output_asm_insn (xasm, &call_op);
21636 return "";
21637 }
21638
21639 /* SEH unwinding can require an extra nop to be emitted in several
21640 circumstances. Determine if we have one of those. */
21641 if (TARGET_SEH)
21642 {
21643 rtx i;
21644
21645 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21646 {
21647 /* If we get to another real insn, we don't need the nop. */
21648 if (INSN_P (i))
21649 break;
21650
21651 /* If we get to the epilogue note, prevent a catch region from
21652 being adjacent to the standard epilogue sequence. If non-
21653 call-exceptions, we'll have done this during epilogue emission. */
21654 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21655 && !flag_non_call_exceptions
21656 && !can_throw_internal (insn))
21657 {
21658 seh_nop_p = true;
21659 break;
21660 }
21661 }
21662
21663 /* If we didn't find a real insn following the call, prevent the
21664 unwinder from looking into the next function. */
21665 if (i == NULL)
21666 seh_nop_p = true;
21667 }
21668
21669 if (direct_p)
21670 xasm = "call\t%P0";
21671 else
21672 xasm = "call\t%A0";
21673
21674 output_asm_insn (xasm, &call_op);
21675
21676 if (seh_nop_p)
21677 return "nop";
21678
21679 return "";
21680 }
21681 \f
21682 /* Clear stack slot assignments remembered from previous functions.
21683 This is called from INIT_EXPANDERS once before RTL is emitted for each
21684 function. */
21685
21686 static struct machine_function *
21687 ix86_init_machine_status (void)
21688 {
21689 struct machine_function *f;
21690
21691 f = ggc_alloc_cleared_machine_function ();
21692 f->use_fast_prologue_epilogue_nregs = -1;
21693 f->tls_descriptor_call_expanded_p = 0;
21694 f->call_abi = ix86_abi;
21695
21696 return f;
21697 }
21698
21699 /* Return a MEM corresponding to a stack slot with mode MODE.
21700 Allocate a new slot if necessary.
21701
21702 The RTL for a function can have several slots available: N is
21703 which slot to use. */
21704
21705 rtx
21706 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21707 {
21708 struct stack_local_entry *s;
21709
21710 gcc_assert (n < MAX_386_STACK_LOCALS);
21711
21712 /* Virtual slot is valid only before vregs are instantiated. */
21713 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21714
21715 for (s = ix86_stack_locals; s; s = s->next)
21716 if (s->mode == mode && s->n == n)
21717 return copy_rtx (s->rtl);
21718
21719 s = ggc_alloc_stack_local_entry ();
21720 s->n = n;
21721 s->mode = mode;
21722 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21723
21724 s->next = ix86_stack_locals;
21725 ix86_stack_locals = s;
21726 return s->rtl;
21727 }
21728 \f
21729 /* Calculate the length of the memory address in the instruction
21730 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21731
21732 int
21733 memory_address_length (rtx addr)
21734 {
21735 struct ix86_address parts;
21736 rtx base, index, disp;
21737 int len;
21738 int ok;
21739
21740 if (GET_CODE (addr) == PRE_DEC
21741 || GET_CODE (addr) == POST_INC
21742 || GET_CODE (addr) == PRE_MODIFY
21743 || GET_CODE (addr) == POST_MODIFY)
21744 return 0;
21745
21746 ok = ix86_decompose_address (addr, &parts);
21747 gcc_assert (ok);
21748
21749 if (parts.base && GET_CODE (parts.base) == SUBREG)
21750 parts.base = SUBREG_REG (parts.base);
21751 if (parts.index && GET_CODE (parts.index) == SUBREG)
21752 parts.index = SUBREG_REG (parts.index);
21753
21754 base = parts.base;
21755 index = parts.index;
21756 disp = parts.disp;
21757 len = 0;
21758
21759 /* Rule of thumb:
21760 - esp as the base always wants an index,
21761 - ebp as the base always wants a displacement,
21762 - r12 as the base always wants an index,
21763 - r13 as the base always wants a displacement. */
21764
21765 /* Register Indirect. */
21766 if (base && !index && !disp)
21767 {
21768 /* esp (for its index) and ebp (for its displacement) need
21769 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21770 code. */
21771 if (REG_P (addr)
21772 && (addr == arg_pointer_rtx
21773 || addr == frame_pointer_rtx
21774 || REGNO (addr) == SP_REG
21775 || REGNO (addr) == BP_REG
21776 || REGNO (addr) == R12_REG
21777 || REGNO (addr) == R13_REG))
21778 len = 1;
21779 }
21780
21781 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21782 is not disp32, but disp32(%rip), so for disp32
21783 SIB byte is needed, unless print_operand_address
21784 optimizes it into disp32(%rip) or (%rip) is implied
21785 by UNSPEC. */
21786 else if (disp && !base && !index)
21787 {
21788 len = 4;
21789 if (TARGET_64BIT)
21790 {
21791 rtx symbol = disp;
21792
21793 if (GET_CODE (disp) == CONST)
21794 symbol = XEXP (disp, 0);
21795 if (GET_CODE (symbol) == PLUS
21796 && CONST_INT_P (XEXP (symbol, 1)))
21797 symbol = XEXP (symbol, 0);
21798
21799 if (GET_CODE (symbol) != LABEL_REF
21800 && (GET_CODE (symbol) != SYMBOL_REF
21801 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21802 && (GET_CODE (symbol) != UNSPEC
21803 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21804 && XINT (symbol, 1) != UNSPEC_PCREL
21805 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21806 len += 1;
21807 }
21808 }
21809
21810 else
21811 {
21812 /* Find the length of the displacement constant. */
21813 if (disp)
21814 {
21815 if (base && satisfies_constraint_K (disp))
21816 len = 1;
21817 else
21818 len = 4;
21819 }
21820 /* ebp always wants a displacement. Similarly r13. */
21821 else if (base && REG_P (base)
21822 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21823 len = 1;
21824
21825 /* An index requires the two-byte modrm form.... */
21826 if (index
21827 /* ...like esp (or r12), which always wants an index. */
21828 || base == arg_pointer_rtx
21829 || base == frame_pointer_rtx
21830 || (base && REG_P (base)
21831 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21832 len += 1;
21833 }
21834
21835 switch (parts.seg)
21836 {
21837 case SEG_FS:
21838 case SEG_GS:
21839 len += 1;
21840 break;
21841 default:
21842 break;
21843 }
21844
21845 return len;
21846 }
21847
21848 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21849 is set, expect that insn have 8bit immediate alternative. */
21850 int
21851 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21852 {
21853 int len = 0;
21854 int i;
21855 extract_insn_cached (insn);
21856 for (i = recog_data.n_operands - 1; i >= 0; --i)
21857 if (CONSTANT_P (recog_data.operand[i]))
21858 {
21859 enum attr_mode mode = get_attr_mode (insn);
21860
21861 gcc_assert (!len);
21862 if (shortform && CONST_INT_P (recog_data.operand[i]))
21863 {
21864 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21865 switch (mode)
21866 {
21867 case MODE_QI:
21868 len = 1;
21869 continue;
21870 case MODE_HI:
21871 ival = trunc_int_for_mode (ival, HImode);
21872 break;
21873 case MODE_SI:
21874 ival = trunc_int_for_mode (ival, SImode);
21875 break;
21876 default:
21877 break;
21878 }
21879 if (IN_RANGE (ival, -128, 127))
21880 {
21881 len = 1;
21882 continue;
21883 }
21884 }
21885 switch (mode)
21886 {
21887 case MODE_QI:
21888 len = 1;
21889 break;
21890 case MODE_HI:
21891 len = 2;
21892 break;
21893 case MODE_SI:
21894 len = 4;
21895 break;
21896 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21897 case MODE_DI:
21898 len = 4;
21899 break;
21900 default:
21901 fatal_insn ("unknown insn mode", insn);
21902 }
21903 }
21904 return len;
21905 }
21906 /* Compute default value for "length_address" attribute. */
21907 int
21908 ix86_attr_length_address_default (rtx insn)
21909 {
21910 int i;
21911
21912 if (get_attr_type (insn) == TYPE_LEA)
21913 {
21914 rtx set = PATTERN (insn), addr;
21915
21916 if (GET_CODE (set) == PARALLEL)
21917 set = XVECEXP (set, 0, 0);
21918
21919 gcc_assert (GET_CODE (set) == SET);
21920
21921 addr = SET_SRC (set);
21922 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21923 {
21924 if (GET_CODE (addr) == ZERO_EXTEND)
21925 addr = XEXP (addr, 0);
21926 if (GET_CODE (addr) == SUBREG)
21927 addr = SUBREG_REG (addr);
21928 }
21929
21930 return memory_address_length (addr);
21931 }
21932
21933 extract_insn_cached (insn);
21934 for (i = recog_data.n_operands - 1; i >= 0; --i)
21935 if (MEM_P (recog_data.operand[i]))
21936 {
21937 constrain_operands_cached (reload_completed);
21938 if (which_alternative != -1)
21939 {
21940 const char *constraints = recog_data.constraints[i];
21941 int alt = which_alternative;
21942
21943 while (*constraints == '=' || *constraints == '+')
21944 constraints++;
21945 while (alt-- > 0)
21946 while (*constraints++ != ',')
21947 ;
21948 /* Skip ignored operands. */
21949 if (*constraints == 'X')
21950 continue;
21951 }
21952 return memory_address_length (XEXP (recog_data.operand[i], 0));
21953 }
21954 return 0;
21955 }
21956
21957 /* Compute default value for "length_vex" attribute. It includes
21958 2 or 3 byte VEX prefix and 1 opcode byte. */
21959
21960 int
21961 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
21962 {
21963 int i;
21964
21965 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21966 byte VEX prefix. */
21967 if (!has_0f_opcode || has_vex_w)
21968 return 3 + 1;
21969
21970 /* We can always use 2 byte VEX prefix in 32bit. */
21971 if (!TARGET_64BIT)
21972 return 2 + 1;
21973
21974 extract_insn_cached (insn);
21975
21976 for (i = recog_data.n_operands - 1; i >= 0; --i)
21977 if (REG_P (recog_data.operand[i]))
21978 {
21979 /* REX.W bit uses 3 byte VEX prefix. */
21980 if (GET_MODE (recog_data.operand[i]) == DImode
21981 && GENERAL_REG_P (recog_data.operand[i]))
21982 return 3 + 1;
21983 }
21984 else
21985 {
21986 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21987 if (MEM_P (recog_data.operand[i])
21988 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21989 return 3 + 1;
21990 }
21991
21992 return 2 + 1;
21993 }
21994 \f
21995 /* Return the maximum number of instructions a cpu can issue. */
21996
21997 static int
21998 ix86_issue_rate (void)
21999 {
22000 switch (ix86_tune)
22001 {
22002 case PROCESSOR_PENTIUM:
22003 case PROCESSOR_ATOM:
22004 case PROCESSOR_K6:
22005 return 2;
22006
22007 case PROCESSOR_PENTIUMPRO:
22008 case PROCESSOR_PENTIUM4:
22009 case PROCESSOR_CORE2_32:
22010 case PROCESSOR_CORE2_64:
22011 case PROCESSOR_COREI7_32:
22012 case PROCESSOR_COREI7_64:
22013 case PROCESSOR_ATHLON:
22014 case PROCESSOR_K8:
22015 case PROCESSOR_AMDFAM10:
22016 case PROCESSOR_NOCONA:
22017 case PROCESSOR_GENERIC32:
22018 case PROCESSOR_GENERIC64:
22019 case PROCESSOR_BDVER1:
22020 case PROCESSOR_BDVER2:
22021 case PROCESSOR_BTVER1:
22022 return 3;
22023
22024 default:
22025 return 1;
22026 }
22027 }
22028
22029 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22030 by DEP_INSN and nothing set by DEP_INSN. */
22031
22032 static bool
22033 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22034 {
22035 rtx set, set2;
22036
22037 /* Simplify the test for uninteresting insns. */
22038 if (insn_type != TYPE_SETCC
22039 && insn_type != TYPE_ICMOV
22040 && insn_type != TYPE_FCMOV
22041 && insn_type != TYPE_IBR)
22042 return false;
22043
22044 if ((set = single_set (dep_insn)) != 0)
22045 {
22046 set = SET_DEST (set);
22047 set2 = NULL_RTX;
22048 }
22049 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22050 && XVECLEN (PATTERN (dep_insn), 0) == 2
22051 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22052 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22053 {
22054 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22055 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22056 }
22057 else
22058 return false;
22059
22060 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22061 return false;
22062
22063 /* This test is true if the dependent insn reads the flags but
22064 not any other potentially set register. */
22065 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22066 return false;
22067
22068 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22069 return false;
22070
22071 return true;
22072 }
22073
22074 /* Return true iff USE_INSN has a memory address with operands set by
22075 SET_INSN. */
22076
22077 bool
22078 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22079 {
22080 int i;
22081 extract_insn_cached (use_insn);
22082 for (i = recog_data.n_operands - 1; i >= 0; --i)
22083 if (MEM_P (recog_data.operand[i]))
22084 {
22085 rtx addr = XEXP (recog_data.operand[i], 0);
22086 return modified_in_p (addr, set_insn) != 0;
22087 }
22088 return false;
22089 }
22090
22091 static int
22092 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22093 {
22094 enum attr_type insn_type, dep_insn_type;
22095 enum attr_memory memory;
22096 rtx set, set2;
22097 int dep_insn_code_number;
22098
22099 /* Anti and output dependencies have zero cost on all CPUs. */
22100 if (REG_NOTE_KIND (link) != 0)
22101 return 0;
22102
22103 dep_insn_code_number = recog_memoized (dep_insn);
22104
22105 /* If we can't recognize the insns, we can't really do anything. */
22106 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22107 return cost;
22108
22109 insn_type = get_attr_type (insn);
22110 dep_insn_type = get_attr_type (dep_insn);
22111
22112 switch (ix86_tune)
22113 {
22114 case PROCESSOR_PENTIUM:
22115 /* Address Generation Interlock adds a cycle of latency. */
22116 if (insn_type == TYPE_LEA)
22117 {
22118 rtx addr = PATTERN (insn);
22119
22120 if (GET_CODE (addr) == PARALLEL)
22121 addr = XVECEXP (addr, 0, 0);
22122
22123 gcc_assert (GET_CODE (addr) == SET);
22124
22125 addr = SET_SRC (addr);
22126 if (modified_in_p (addr, dep_insn))
22127 cost += 1;
22128 }
22129 else if (ix86_agi_dependent (dep_insn, insn))
22130 cost += 1;
22131
22132 /* ??? Compares pair with jump/setcc. */
22133 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22134 cost = 0;
22135
22136 /* Floating point stores require value to be ready one cycle earlier. */
22137 if (insn_type == TYPE_FMOV
22138 && get_attr_memory (insn) == MEMORY_STORE
22139 && !ix86_agi_dependent (dep_insn, insn))
22140 cost += 1;
22141 break;
22142
22143 case PROCESSOR_PENTIUMPRO:
22144 memory = get_attr_memory (insn);
22145
22146 /* INT->FP conversion is expensive. */
22147 if (get_attr_fp_int_src (dep_insn))
22148 cost += 5;
22149
22150 /* There is one cycle extra latency between an FP op and a store. */
22151 if (insn_type == TYPE_FMOV
22152 && (set = single_set (dep_insn)) != NULL_RTX
22153 && (set2 = single_set (insn)) != NULL_RTX
22154 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22155 && MEM_P (SET_DEST (set2)))
22156 cost += 1;
22157
22158 /* Show ability of reorder buffer to hide latency of load by executing
22159 in parallel with previous instruction in case
22160 previous instruction is not needed to compute the address. */
22161 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22162 && !ix86_agi_dependent (dep_insn, insn))
22163 {
22164 /* Claim moves to take one cycle, as core can issue one load
22165 at time and the next load can start cycle later. */
22166 if (dep_insn_type == TYPE_IMOV
22167 || dep_insn_type == TYPE_FMOV)
22168 cost = 1;
22169 else if (cost > 1)
22170 cost--;
22171 }
22172 break;
22173
22174 case PROCESSOR_K6:
22175 memory = get_attr_memory (insn);
22176
22177 /* The esp dependency is resolved before the instruction is really
22178 finished. */
22179 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22180 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22181 return 1;
22182
22183 /* INT->FP conversion is expensive. */
22184 if (get_attr_fp_int_src (dep_insn))
22185 cost += 5;
22186
22187 /* Show ability of reorder buffer to hide latency of load by executing
22188 in parallel with previous instruction in case
22189 previous instruction is not needed to compute the address. */
22190 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22191 && !ix86_agi_dependent (dep_insn, insn))
22192 {
22193 /* Claim moves to take one cycle, as core can issue one load
22194 at time and the next load can start cycle later. */
22195 if (dep_insn_type == TYPE_IMOV
22196 || dep_insn_type == TYPE_FMOV)
22197 cost = 1;
22198 else if (cost > 2)
22199 cost -= 2;
22200 else
22201 cost = 1;
22202 }
22203 break;
22204
22205 case PROCESSOR_ATHLON:
22206 case PROCESSOR_K8:
22207 case PROCESSOR_AMDFAM10:
22208 case PROCESSOR_BDVER1:
22209 case PROCESSOR_BDVER2:
22210 case PROCESSOR_BTVER1:
22211 case PROCESSOR_ATOM:
22212 case PROCESSOR_GENERIC32:
22213 case PROCESSOR_GENERIC64:
22214 memory = get_attr_memory (insn);
22215
22216 /* Show ability of reorder buffer to hide latency of load by executing
22217 in parallel with previous instruction in case
22218 previous instruction is not needed to compute the address. */
22219 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22220 && !ix86_agi_dependent (dep_insn, insn))
22221 {
22222 enum attr_unit unit = get_attr_unit (insn);
22223 int loadcost = 3;
22224
22225 /* Because of the difference between the length of integer and
22226 floating unit pipeline preparation stages, the memory operands
22227 for floating point are cheaper.
22228
22229 ??? For Athlon it the difference is most probably 2. */
22230 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22231 loadcost = 3;
22232 else
22233 loadcost = TARGET_ATHLON ? 2 : 0;
22234
22235 if (cost >= loadcost)
22236 cost -= loadcost;
22237 else
22238 cost = 0;
22239 }
22240
22241 default:
22242 break;
22243 }
22244
22245 return cost;
22246 }
22247
22248 /* How many alternative schedules to try. This should be as wide as the
22249 scheduling freedom in the DFA, but no wider. Making this value too
22250 large results extra work for the scheduler. */
22251
22252 static int
22253 ia32_multipass_dfa_lookahead (void)
22254 {
22255 switch (ix86_tune)
22256 {
22257 case PROCESSOR_PENTIUM:
22258 return 2;
22259
22260 case PROCESSOR_PENTIUMPRO:
22261 case PROCESSOR_K6:
22262 return 1;
22263
22264 case PROCESSOR_CORE2_32:
22265 case PROCESSOR_CORE2_64:
22266 case PROCESSOR_COREI7_32:
22267 case PROCESSOR_COREI7_64:
22268 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22269 as many instructions can be executed on a cycle, i.e.,
22270 issue_rate. I wonder why tuning for many CPUs does not do this. */
22271 return ix86_issue_rate ();
22272
22273 default:
22274 return 0;
22275 }
22276 }
22277
22278 \f
22279
22280 /* Model decoder of Core 2/i7.
22281 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22282 track the instruction fetch block boundaries and make sure that long
22283 (9+ bytes) instructions are assigned to D0. */
22284
22285 /* Maximum length of an insn that can be handled by
22286 a secondary decoder unit. '8' for Core 2/i7. */
22287 static int core2i7_secondary_decoder_max_insn_size;
22288
22289 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22290 '16' for Core 2/i7. */
22291 static int core2i7_ifetch_block_size;
22292
22293 /* Maximum number of instructions decoder can handle per cycle.
22294 '6' for Core 2/i7. */
22295 static int core2i7_ifetch_block_max_insns;
22296
22297 typedef struct ix86_first_cycle_multipass_data_ *
22298 ix86_first_cycle_multipass_data_t;
22299 typedef const struct ix86_first_cycle_multipass_data_ *
22300 const_ix86_first_cycle_multipass_data_t;
22301
22302 /* A variable to store target state across calls to max_issue within
22303 one cycle. */
22304 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22305 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22306
22307 /* Initialize DATA. */
22308 static void
22309 core2i7_first_cycle_multipass_init (void *_data)
22310 {
22311 ix86_first_cycle_multipass_data_t data
22312 = (ix86_first_cycle_multipass_data_t) _data;
22313
22314 data->ifetch_block_len = 0;
22315 data->ifetch_block_n_insns = 0;
22316 data->ready_try_change = NULL;
22317 data->ready_try_change_size = 0;
22318 }
22319
22320 /* Advancing the cycle; reset ifetch block counts. */
22321 static void
22322 core2i7_dfa_post_advance_cycle (void)
22323 {
22324 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22325
22326 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22327
22328 data->ifetch_block_len = 0;
22329 data->ifetch_block_n_insns = 0;
22330 }
22331
22332 static int min_insn_size (rtx);
22333
22334 /* Filter out insns from ready_try that the core will not be able to issue
22335 on current cycle due to decoder. */
22336 static void
22337 core2i7_first_cycle_multipass_filter_ready_try
22338 (const_ix86_first_cycle_multipass_data_t data,
22339 char *ready_try, int n_ready, bool first_cycle_insn_p)
22340 {
22341 while (n_ready--)
22342 {
22343 rtx insn;
22344 int insn_size;
22345
22346 if (ready_try[n_ready])
22347 continue;
22348
22349 insn = get_ready_element (n_ready);
22350 insn_size = min_insn_size (insn);
22351
22352 if (/* If this is a too long an insn for a secondary decoder ... */
22353 (!first_cycle_insn_p
22354 && insn_size > core2i7_secondary_decoder_max_insn_size)
22355 /* ... or it would not fit into the ifetch block ... */
22356 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22357 /* ... or the decoder is full already ... */
22358 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22359 /* ... mask the insn out. */
22360 {
22361 ready_try[n_ready] = 1;
22362
22363 if (data->ready_try_change)
22364 SET_BIT (data->ready_try_change, n_ready);
22365 }
22366 }
22367 }
22368
22369 /* Prepare for a new round of multipass lookahead scheduling. */
22370 static void
22371 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22372 bool first_cycle_insn_p)
22373 {
22374 ix86_first_cycle_multipass_data_t data
22375 = (ix86_first_cycle_multipass_data_t) _data;
22376 const_ix86_first_cycle_multipass_data_t prev_data
22377 = ix86_first_cycle_multipass_data;
22378
22379 /* Restore the state from the end of the previous round. */
22380 data->ifetch_block_len = prev_data->ifetch_block_len;
22381 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22382
22383 /* Filter instructions that cannot be issued on current cycle due to
22384 decoder restrictions. */
22385 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22386 first_cycle_insn_p);
22387 }
22388
22389 /* INSN is being issued in current solution. Account for its impact on
22390 the decoder model. */
22391 static void
22392 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22393 rtx insn, const void *_prev_data)
22394 {
22395 ix86_first_cycle_multipass_data_t data
22396 = (ix86_first_cycle_multipass_data_t) _data;
22397 const_ix86_first_cycle_multipass_data_t prev_data
22398 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22399
22400 int insn_size = min_insn_size (insn);
22401
22402 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22403 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22404 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22405 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22406
22407 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22408 if (!data->ready_try_change)
22409 {
22410 data->ready_try_change = sbitmap_alloc (n_ready);
22411 data->ready_try_change_size = n_ready;
22412 }
22413 else if (data->ready_try_change_size < n_ready)
22414 {
22415 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22416 n_ready, 0);
22417 data->ready_try_change_size = n_ready;
22418 }
22419 sbitmap_zero (data->ready_try_change);
22420
22421 /* Filter out insns from ready_try that the core will not be able to issue
22422 on current cycle due to decoder. */
22423 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22424 false);
22425 }
22426
22427 /* Revert the effect on ready_try. */
22428 static void
22429 core2i7_first_cycle_multipass_backtrack (const void *_data,
22430 char *ready_try,
22431 int n_ready ATTRIBUTE_UNUSED)
22432 {
22433 const_ix86_first_cycle_multipass_data_t data
22434 = (const_ix86_first_cycle_multipass_data_t) _data;
22435 unsigned int i = 0;
22436 sbitmap_iterator sbi;
22437
22438 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22439 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22440 {
22441 ready_try[i] = 0;
22442 }
22443 }
22444
22445 /* Save the result of multipass lookahead scheduling for the next round. */
22446 static void
22447 core2i7_first_cycle_multipass_end (const void *_data)
22448 {
22449 const_ix86_first_cycle_multipass_data_t data
22450 = (const_ix86_first_cycle_multipass_data_t) _data;
22451 ix86_first_cycle_multipass_data_t next_data
22452 = ix86_first_cycle_multipass_data;
22453
22454 if (data != NULL)
22455 {
22456 next_data->ifetch_block_len = data->ifetch_block_len;
22457 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22458 }
22459 }
22460
22461 /* Deallocate target data. */
22462 static void
22463 core2i7_first_cycle_multipass_fini (void *_data)
22464 {
22465 ix86_first_cycle_multipass_data_t data
22466 = (ix86_first_cycle_multipass_data_t) _data;
22467
22468 if (data->ready_try_change)
22469 {
22470 sbitmap_free (data->ready_try_change);
22471 data->ready_try_change = NULL;
22472 data->ready_try_change_size = 0;
22473 }
22474 }
22475
22476 /* Prepare for scheduling pass. */
22477 static void
22478 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22479 int verbose ATTRIBUTE_UNUSED,
22480 int max_uid ATTRIBUTE_UNUSED)
22481 {
22482 /* Install scheduling hooks for current CPU. Some of these hooks are used
22483 in time-critical parts of the scheduler, so we only set them up when
22484 they are actually used. */
22485 switch (ix86_tune)
22486 {
22487 case PROCESSOR_CORE2_32:
22488 case PROCESSOR_CORE2_64:
22489 case PROCESSOR_COREI7_32:
22490 case PROCESSOR_COREI7_64:
22491 targetm.sched.dfa_post_advance_cycle
22492 = core2i7_dfa_post_advance_cycle;
22493 targetm.sched.first_cycle_multipass_init
22494 = core2i7_first_cycle_multipass_init;
22495 targetm.sched.first_cycle_multipass_begin
22496 = core2i7_first_cycle_multipass_begin;
22497 targetm.sched.first_cycle_multipass_issue
22498 = core2i7_first_cycle_multipass_issue;
22499 targetm.sched.first_cycle_multipass_backtrack
22500 = core2i7_first_cycle_multipass_backtrack;
22501 targetm.sched.first_cycle_multipass_end
22502 = core2i7_first_cycle_multipass_end;
22503 targetm.sched.first_cycle_multipass_fini
22504 = core2i7_first_cycle_multipass_fini;
22505
22506 /* Set decoder parameters. */
22507 core2i7_secondary_decoder_max_insn_size = 8;
22508 core2i7_ifetch_block_size = 16;
22509 core2i7_ifetch_block_max_insns = 6;
22510 break;
22511
22512 default:
22513 targetm.sched.dfa_post_advance_cycle = NULL;
22514 targetm.sched.first_cycle_multipass_init = NULL;
22515 targetm.sched.first_cycle_multipass_begin = NULL;
22516 targetm.sched.first_cycle_multipass_issue = NULL;
22517 targetm.sched.first_cycle_multipass_backtrack = NULL;
22518 targetm.sched.first_cycle_multipass_end = NULL;
22519 targetm.sched.first_cycle_multipass_fini = NULL;
22520 break;
22521 }
22522 }
22523
22524 \f
22525 /* Compute the alignment given to a constant that is being placed in memory.
22526 EXP is the constant and ALIGN is the alignment that the object would
22527 ordinarily have.
22528 The value of this function is used instead of that alignment to align
22529 the object. */
22530
22531 int
22532 ix86_constant_alignment (tree exp, int align)
22533 {
22534 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22535 || TREE_CODE (exp) == INTEGER_CST)
22536 {
22537 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22538 return 64;
22539 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22540 return 128;
22541 }
22542 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22543 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22544 return BITS_PER_WORD;
22545
22546 return align;
22547 }
22548
22549 /* Compute the alignment for a static variable.
22550 TYPE is the data type, and ALIGN is the alignment that
22551 the object would ordinarily have. The value of this function is used
22552 instead of that alignment to align the object. */
22553
22554 int
22555 ix86_data_alignment (tree type, int align)
22556 {
22557 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22558
22559 if (AGGREGATE_TYPE_P (type)
22560 && TYPE_SIZE (type)
22561 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22562 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22563 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22564 && align < max_align)
22565 align = max_align;
22566
22567 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22568 to 16byte boundary. */
22569 if (TARGET_64BIT)
22570 {
22571 if (AGGREGATE_TYPE_P (type)
22572 && TYPE_SIZE (type)
22573 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22574 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22575 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22576 return 128;
22577 }
22578
22579 if (TREE_CODE (type) == ARRAY_TYPE)
22580 {
22581 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22582 return 64;
22583 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22584 return 128;
22585 }
22586 else if (TREE_CODE (type) == COMPLEX_TYPE)
22587 {
22588
22589 if (TYPE_MODE (type) == DCmode && align < 64)
22590 return 64;
22591 if ((TYPE_MODE (type) == XCmode
22592 || TYPE_MODE (type) == TCmode) && align < 128)
22593 return 128;
22594 }
22595 else if ((TREE_CODE (type) == RECORD_TYPE
22596 || TREE_CODE (type) == UNION_TYPE
22597 || TREE_CODE (type) == QUAL_UNION_TYPE)
22598 && TYPE_FIELDS (type))
22599 {
22600 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22601 return 64;
22602 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22603 return 128;
22604 }
22605 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22606 || TREE_CODE (type) == INTEGER_TYPE)
22607 {
22608 if (TYPE_MODE (type) == DFmode && align < 64)
22609 return 64;
22610 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22611 return 128;
22612 }
22613
22614 return align;
22615 }
22616
22617 /* Compute the alignment for a local variable or a stack slot. EXP is
22618 the data type or decl itself, MODE is the widest mode available and
22619 ALIGN is the alignment that the object would ordinarily have. The
22620 value of this macro is used instead of that alignment to align the
22621 object. */
22622
22623 unsigned int
22624 ix86_local_alignment (tree exp, enum machine_mode mode,
22625 unsigned int align)
22626 {
22627 tree type, decl;
22628
22629 if (exp && DECL_P (exp))
22630 {
22631 type = TREE_TYPE (exp);
22632 decl = exp;
22633 }
22634 else
22635 {
22636 type = exp;
22637 decl = NULL;
22638 }
22639
22640 /* Don't do dynamic stack realignment for long long objects with
22641 -mpreferred-stack-boundary=2. */
22642 if (!TARGET_64BIT
22643 && align == 64
22644 && ix86_preferred_stack_boundary < 64
22645 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22646 && (!type || !TYPE_USER_ALIGN (type))
22647 && (!decl || !DECL_USER_ALIGN (decl)))
22648 align = 32;
22649
22650 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22651 register in MODE. We will return the largest alignment of XF
22652 and DF. */
22653 if (!type)
22654 {
22655 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22656 align = GET_MODE_ALIGNMENT (DFmode);
22657 return align;
22658 }
22659
22660 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22661 to 16byte boundary. Exact wording is:
22662
22663 An array uses the same alignment as its elements, except that a local or
22664 global array variable of length at least 16 bytes or
22665 a C99 variable-length array variable always has alignment of at least 16 bytes.
22666
22667 This was added to allow use of aligned SSE instructions at arrays. This
22668 rule is meant for static storage (where compiler can not do the analysis
22669 by itself). We follow it for automatic variables only when convenient.
22670 We fully control everything in the function compiled and functions from
22671 other unit can not rely on the alignment.
22672
22673 Exclude va_list type. It is the common case of local array where
22674 we can not benefit from the alignment. */
22675 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22676 && TARGET_SSE)
22677 {
22678 if (AGGREGATE_TYPE_P (type)
22679 && (va_list_type_node == NULL_TREE
22680 || (TYPE_MAIN_VARIANT (type)
22681 != TYPE_MAIN_VARIANT (va_list_type_node)))
22682 && TYPE_SIZE (type)
22683 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22684 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22685 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22686 return 128;
22687 }
22688 if (TREE_CODE (type) == ARRAY_TYPE)
22689 {
22690 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22691 return 64;
22692 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22693 return 128;
22694 }
22695 else if (TREE_CODE (type) == COMPLEX_TYPE)
22696 {
22697 if (TYPE_MODE (type) == DCmode && align < 64)
22698 return 64;
22699 if ((TYPE_MODE (type) == XCmode
22700 || TYPE_MODE (type) == TCmode) && align < 128)
22701 return 128;
22702 }
22703 else if ((TREE_CODE (type) == RECORD_TYPE
22704 || TREE_CODE (type) == UNION_TYPE
22705 || TREE_CODE (type) == QUAL_UNION_TYPE)
22706 && TYPE_FIELDS (type))
22707 {
22708 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22709 return 64;
22710 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22711 return 128;
22712 }
22713 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22714 || TREE_CODE (type) == INTEGER_TYPE)
22715 {
22716
22717 if (TYPE_MODE (type) == DFmode && align < 64)
22718 return 64;
22719 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22720 return 128;
22721 }
22722 return align;
22723 }
22724
22725 /* Compute the minimum required alignment for dynamic stack realignment
22726 purposes for a local variable, parameter or a stack slot. EXP is
22727 the data type or decl itself, MODE is its mode and ALIGN is the
22728 alignment that the object would ordinarily have. */
22729
22730 unsigned int
22731 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22732 unsigned int align)
22733 {
22734 tree type, decl;
22735
22736 if (exp && DECL_P (exp))
22737 {
22738 type = TREE_TYPE (exp);
22739 decl = exp;
22740 }
22741 else
22742 {
22743 type = exp;
22744 decl = NULL;
22745 }
22746
22747 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22748 return align;
22749
22750 /* Don't do dynamic stack realignment for long long objects with
22751 -mpreferred-stack-boundary=2. */
22752 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22753 && (!type || !TYPE_USER_ALIGN (type))
22754 && (!decl || !DECL_USER_ALIGN (decl)))
22755 return 32;
22756
22757 return align;
22758 }
22759 \f
22760 /* Find a location for the static chain incoming to a nested function.
22761 This is a register, unless all free registers are used by arguments. */
22762
22763 static rtx
22764 ix86_static_chain (const_tree fndecl, bool incoming_p)
22765 {
22766 unsigned regno;
22767
22768 if (!DECL_STATIC_CHAIN (fndecl))
22769 return NULL;
22770
22771 if (TARGET_64BIT)
22772 {
22773 /* We always use R10 in 64-bit mode. */
22774 regno = R10_REG;
22775 }
22776 else
22777 {
22778 tree fntype;
22779 unsigned int ccvt;
22780
22781 /* By default in 32-bit mode we use ECX to pass the static chain. */
22782 regno = CX_REG;
22783
22784 fntype = TREE_TYPE (fndecl);
22785 ccvt = ix86_get_callcvt (fntype);
22786 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22787 {
22788 /* Fastcall functions use ecx/edx for arguments, which leaves
22789 us with EAX for the static chain.
22790 Thiscall functions use ecx for arguments, which also
22791 leaves us with EAX for the static chain. */
22792 regno = AX_REG;
22793 }
22794 else if (ix86_function_regparm (fntype, fndecl) == 3)
22795 {
22796 /* For regparm 3, we have no free call-clobbered registers in
22797 which to store the static chain. In order to implement this,
22798 we have the trampoline push the static chain to the stack.
22799 However, we can't push a value below the return address when
22800 we call the nested function directly, so we have to use an
22801 alternate entry point. For this we use ESI, and have the
22802 alternate entry point push ESI, so that things appear the
22803 same once we're executing the nested function. */
22804 if (incoming_p)
22805 {
22806 if (fndecl == current_function_decl)
22807 ix86_static_chain_on_stack = true;
22808 return gen_frame_mem (SImode,
22809 plus_constant (arg_pointer_rtx, -8));
22810 }
22811 regno = SI_REG;
22812 }
22813 }
22814
22815 return gen_rtx_REG (Pmode, regno);
22816 }
22817
22818 /* Emit RTL insns to initialize the variable parts of a trampoline.
22819 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22820 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22821 to be passed to the target function. */
22822
22823 static void
22824 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22825 {
22826 rtx mem, fnaddr;
22827 int opcode;
22828 int offset = 0;
22829
22830 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22831
22832 if (TARGET_64BIT)
22833 {
22834 int size;
22835
22836 /* Load the function address to r11. Try to load address using
22837 the shorter movl instead of movabs. We may want to support
22838 movq for kernel mode, but kernel does not use trampolines at
22839 the moment. */
22840 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22841 {
22842 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22843
22844 mem = adjust_address (m_tramp, HImode, offset);
22845 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22846
22847 mem = adjust_address (m_tramp, SImode, offset + 2);
22848 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22849 offset += 6;
22850 }
22851 else
22852 {
22853 mem = adjust_address (m_tramp, HImode, offset);
22854 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22855
22856 mem = adjust_address (m_tramp, DImode, offset + 2);
22857 emit_move_insn (mem, fnaddr);
22858 offset += 10;
22859 }
22860
22861 /* Load static chain using movabs to r10. Use the
22862 shorter movl instead of movabs for x32. */
22863 if (TARGET_X32)
22864 {
22865 opcode = 0xba41;
22866 size = 6;
22867 }
22868 else
22869 {
22870 opcode = 0xba49;
22871 size = 10;
22872 }
22873
22874 mem = adjust_address (m_tramp, HImode, offset);
22875 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22876
22877 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22878 emit_move_insn (mem, chain_value);
22879 offset += size;
22880
22881 /* Jump to r11; the last (unused) byte is a nop, only there to
22882 pad the write out to a single 32-bit store. */
22883 mem = adjust_address (m_tramp, SImode, offset);
22884 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22885 offset += 4;
22886 }
22887 else
22888 {
22889 rtx disp, chain;
22890
22891 /* Depending on the static chain location, either load a register
22892 with a constant, or push the constant to the stack. All of the
22893 instructions are the same size. */
22894 chain = ix86_static_chain (fndecl, true);
22895 if (REG_P (chain))
22896 {
22897 switch (REGNO (chain))
22898 {
22899 case AX_REG:
22900 opcode = 0xb8; break;
22901 case CX_REG:
22902 opcode = 0xb9; break;
22903 default:
22904 gcc_unreachable ();
22905 }
22906 }
22907 else
22908 opcode = 0x68;
22909
22910 mem = adjust_address (m_tramp, QImode, offset);
22911 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22912
22913 mem = adjust_address (m_tramp, SImode, offset + 1);
22914 emit_move_insn (mem, chain_value);
22915 offset += 5;
22916
22917 mem = adjust_address (m_tramp, QImode, offset);
22918 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22919
22920 mem = adjust_address (m_tramp, SImode, offset + 1);
22921
22922 /* Compute offset from the end of the jmp to the target function.
22923 In the case in which the trampoline stores the static chain on
22924 the stack, we need to skip the first insn which pushes the
22925 (call-saved) register static chain; this push is 1 byte. */
22926 offset += 5;
22927 disp = expand_binop (SImode, sub_optab, fnaddr,
22928 plus_constant (XEXP (m_tramp, 0),
22929 offset - (MEM_P (chain) ? 1 : 0)),
22930 NULL_RTX, 1, OPTAB_DIRECT);
22931 emit_move_insn (mem, disp);
22932 }
22933
22934 gcc_assert (offset <= TRAMPOLINE_SIZE);
22935
22936 #ifdef HAVE_ENABLE_EXECUTE_STACK
22937 #ifdef CHECK_EXECUTE_STACK_ENABLED
22938 if (CHECK_EXECUTE_STACK_ENABLED)
22939 #endif
22940 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22941 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22942 #endif
22943 }
22944 \f
22945 /* The following file contains several enumerations and data structures
22946 built from the definitions in i386-builtin-types.def. */
22947
22948 #include "i386-builtin-types.inc"
22949
22950 /* Table for the ix86 builtin non-function types. */
22951 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22952
22953 /* Retrieve an element from the above table, building some of
22954 the types lazily. */
22955
22956 static tree
22957 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22958 {
22959 unsigned int index;
22960 tree type, itype;
22961
22962 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22963
22964 type = ix86_builtin_type_tab[(int) tcode];
22965 if (type != NULL)
22966 return type;
22967
22968 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22969 if (tcode <= IX86_BT_LAST_VECT)
22970 {
22971 enum machine_mode mode;
22972
22973 index = tcode - IX86_BT_LAST_PRIM - 1;
22974 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22975 mode = ix86_builtin_type_vect_mode[index];
22976
22977 type = build_vector_type_for_mode (itype, mode);
22978 }
22979 else
22980 {
22981 int quals;
22982
22983 index = tcode - IX86_BT_LAST_VECT - 1;
22984 if (tcode <= IX86_BT_LAST_PTR)
22985 quals = TYPE_UNQUALIFIED;
22986 else
22987 quals = TYPE_QUAL_CONST;
22988
22989 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22990 if (quals != TYPE_UNQUALIFIED)
22991 itype = build_qualified_type (itype, quals);
22992
22993 type = build_pointer_type (itype);
22994 }
22995
22996 ix86_builtin_type_tab[(int) tcode] = type;
22997 return type;
22998 }
22999
23000 /* Table for the ix86 builtin function types. */
23001 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23002
23003 /* Retrieve an element from the above table, building some of
23004 the types lazily. */
23005
23006 static tree
23007 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23008 {
23009 tree type;
23010
23011 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23012
23013 type = ix86_builtin_func_type_tab[(int) tcode];
23014 if (type != NULL)
23015 return type;
23016
23017 if (tcode <= IX86_BT_LAST_FUNC)
23018 {
23019 unsigned start = ix86_builtin_func_start[(int) tcode];
23020 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23021 tree rtype, atype, args = void_list_node;
23022 unsigned i;
23023
23024 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23025 for (i = after - 1; i > start; --i)
23026 {
23027 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23028 args = tree_cons (NULL, atype, args);
23029 }
23030
23031 type = build_function_type (rtype, args);
23032 }
23033 else
23034 {
23035 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23036 enum ix86_builtin_func_type icode;
23037
23038 icode = ix86_builtin_func_alias_base[index];
23039 type = ix86_get_builtin_func_type (icode);
23040 }
23041
23042 ix86_builtin_func_type_tab[(int) tcode] = type;
23043 return type;
23044 }
23045
23046
23047 /* Codes for all the SSE/MMX builtins. */
23048 enum ix86_builtins
23049 {
23050 IX86_BUILTIN_ADDPS,
23051 IX86_BUILTIN_ADDSS,
23052 IX86_BUILTIN_DIVPS,
23053 IX86_BUILTIN_DIVSS,
23054 IX86_BUILTIN_MULPS,
23055 IX86_BUILTIN_MULSS,
23056 IX86_BUILTIN_SUBPS,
23057 IX86_BUILTIN_SUBSS,
23058
23059 IX86_BUILTIN_CMPEQPS,
23060 IX86_BUILTIN_CMPLTPS,
23061 IX86_BUILTIN_CMPLEPS,
23062 IX86_BUILTIN_CMPGTPS,
23063 IX86_BUILTIN_CMPGEPS,
23064 IX86_BUILTIN_CMPNEQPS,
23065 IX86_BUILTIN_CMPNLTPS,
23066 IX86_BUILTIN_CMPNLEPS,
23067 IX86_BUILTIN_CMPNGTPS,
23068 IX86_BUILTIN_CMPNGEPS,
23069 IX86_BUILTIN_CMPORDPS,
23070 IX86_BUILTIN_CMPUNORDPS,
23071 IX86_BUILTIN_CMPEQSS,
23072 IX86_BUILTIN_CMPLTSS,
23073 IX86_BUILTIN_CMPLESS,
23074 IX86_BUILTIN_CMPNEQSS,
23075 IX86_BUILTIN_CMPNLTSS,
23076 IX86_BUILTIN_CMPNLESS,
23077 IX86_BUILTIN_CMPNGTSS,
23078 IX86_BUILTIN_CMPNGESS,
23079 IX86_BUILTIN_CMPORDSS,
23080 IX86_BUILTIN_CMPUNORDSS,
23081
23082 IX86_BUILTIN_COMIEQSS,
23083 IX86_BUILTIN_COMILTSS,
23084 IX86_BUILTIN_COMILESS,
23085 IX86_BUILTIN_COMIGTSS,
23086 IX86_BUILTIN_COMIGESS,
23087 IX86_BUILTIN_COMINEQSS,
23088 IX86_BUILTIN_UCOMIEQSS,
23089 IX86_BUILTIN_UCOMILTSS,
23090 IX86_BUILTIN_UCOMILESS,
23091 IX86_BUILTIN_UCOMIGTSS,
23092 IX86_BUILTIN_UCOMIGESS,
23093 IX86_BUILTIN_UCOMINEQSS,
23094
23095 IX86_BUILTIN_CVTPI2PS,
23096 IX86_BUILTIN_CVTPS2PI,
23097 IX86_BUILTIN_CVTSI2SS,
23098 IX86_BUILTIN_CVTSI642SS,
23099 IX86_BUILTIN_CVTSS2SI,
23100 IX86_BUILTIN_CVTSS2SI64,
23101 IX86_BUILTIN_CVTTPS2PI,
23102 IX86_BUILTIN_CVTTSS2SI,
23103 IX86_BUILTIN_CVTTSS2SI64,
23104
23105 IX86_BUILTIN_MAXPS,
23106 IX86_BUILTIN_MAXSS,
23107 IX86_BUILTIN_MINPS,
23108 IX86_BUILTIN_MINSS,
23109
23110 IX86_BUILTIN_LOADUPS,
23111 IX86_BUILTIN_STOREUPS,
23112 IX86_BUILTIN_MOVSS,
23113
23114 IX86_BUILTIN_MOVHLPS,
23115 IX86_BUILTIN_MOVLHPS,
23116 IX86_BUILTIN_LOADHPS,
23117 IX86_BUILTIN_LOADLPS,
23118 IX86_BUILTIN_STOREHPS,
23119 IX86_BUILTIN_STORELPS,
23120
23121 IX86_BUILTIN_MASKMOVQ,
23122 IX86_BUILTIN_MOVMSKPS,
23123 IX86_BUILTIN_PMOVMSKB,
23124
23125 IX86_BUILTIN_MOVNTPS,
23126 IX86_BUILTIN_MOVNTQ,
23127
23128 IX86_BUILTIN_LOADDQU,
23129 IX86_BUILTIN_STOREDQU,
23130
23131 IX86_BUILTIN_PACKSSWB,
23132 IX86_BUILTIN_PACKSSDW,
23133 IX86_BUILTIN_PACKUSWB,
23134
23135 IX86_BUILTIN_PADDB,
23136 IX86_BUILTIN_PADDW,
23137 IX86_BUILTIN_PADDD,
23138 IX86_BUILTIN_PADDQ,
23139 IX86_BUILTIN_PADDSB,
23140 IX86_BUILTIN_PADDSW,
23141 IX86_BUILTIN_PADDUSB,
23142 IX86_BUILTIN_PADDUSW,
23143 IX86_BUILTIN_PSUBB,
23144 IX86_BUILTIN_PSUBW,
23145 IX86_BUILTIN_PSUBD,
23146 IX86_BUILTIN_PSUBQ,
23147 IX86_BUILTIN_PSUBSB,
23148 IX86_BUILTIN_PSUBSW,
23149 IX86_BUILTIN_PSUBUSB,
23150 IX86_BUILTIN_PSUBUSW,
23151
23152 IX86_BUILTIN_PAND,
23153 IX86_BUILTIN_PANDN,
23154 IX86_BUILTIN_POR,
23155 IX86_BUILTIN_PXOR,
23156
23157 IX86_BUILTIN_PAVGB,
23158 IX86_BUILTIN_PAVGW,
23159
23160 IX86_BUILTIN_PCMPEQB,
23161 IX86_BUILTIN_PCMPEQW,
23162 IX86_BUILTIN_PCMPEQD,
23163 IX86_BUILTIN_PCMPGTB,
23164 IX86_BUILTIN_PCMPGTW,
23165 IX86_BUILTIN_PCMPGTD,
23166
23167 IX86_BUILTIN_PMADDWD,
23168
23169 IX86_BUILTIN_PMAXSW,
23170 IX86_BUILTIN_PMAXUB,
23171 IX86_BUILTIN_PMINSW,
23172 IX86_BUILTIN_PMINUB,
23173
23174 IX86_BUILTIN_PMULHUW,
23175 IX86_BUILTIN_PMULHW,
23176 IX86_BUILTIN_PMULLW,
23177
23178 IX86_BUILTIN_PSADBW,
23179 IX86_BUILTIN_PSHUFW,
23180
23181 IX86_BUILTIN_PSLLW,
23182 IX86_BUILTIN_PSLLD,
23183 IX86_BUILTIN_PSLLQ,
23184 IX86_BUILTIN_PSRAW,
23185 IX86_BUILTIN_PSRAD,
23186 IX86_BUILTIN_PSRLW,
23187 IX86_BUILTIN_PSRLD,
23188 IX86_BUILTIN_PSRLQ,
23189 IX86_BUILTIN_PSLLWI,
23190 IX86_BUILTIN_PSLLDI,
23191 IX86_BUILTIN_PSLLQI,
23192 IX86_BUILTIN_PSRAWI,
23193 IX86_BUILTIN_PSRADI,
23194 IX86_BUILTIN_PSRLWI,
23195 IX86_BUILTIN_PSRLDI,
23196 IX86_BUILTIN_PSRLQI,
23197
23198 IX86_BUILTIN_PUNPCKHBW,
23199 IX86_BUILTIN_PUNPCKHWD,
23200 IX86_BUILTIN_PUNPCKHDQ,
23201 IX86_BUILTIN_PUNPCKLBW,
23202 IX86_BUILTIN_PUNPCKLWD,
23203 IX86_BUILTIN_PUNPCKLDQ,
23204
23205 IX86_BUILTIN_SHUFPS,
23206
23207 IX86_BUILTIN_RCPPS,
23208 IX86_BUILTIN_RCPSS,
23209 IX86_BUILTIN_RSQRTPS,
23210 IX86_BUILTIN_RSQRTPS_NR,
23211 IX86_BUILTIN_RSQRTSS,
23212 IX86_BUILTIN_RSQRTF,
23213 IX86_BUILTIN_SQRTPS,
23214 IX86_BUILTIN_SQRTPS_NR,
23215 IX86_BUILTIN_SQRTSS,
23216
23217 IX86_BUILTIN_UNPCKHPS,
23218 IX86_BUILTIN_UNPCKLPS,
23219
23220 IX86_BUILTIN_ANDPS,
23221 IX86_BUILTIN_ANDNPS,
23222 IX86_BUILTIN_ORPS,
23223 IX86_BUILTIN_XORPS,
23224
23225 IX86_BUILTIN_EMMS,
23226 IX86_BUILTIN_LDMXCSR,
23227 IX86_BUILTIN_STMXCSR,
23228 IX86_BUILTIN_SFENCE,
23229
23230 /* 3DNow! Original */
23231 IX86_BUILTIN_FEMMS,
23232 IX86_BUILTIN_PAVGUSB,
23233 IX86_BUILTIN_PF2ID,
23234 IX86_BUILTIN_PFACC,
23235 IX86_BUILTIN_PFADD,
23236 IX86_BUILTIN_PFCMPEQ,
23237 IX86_BUILTIN_PFCMPGE,
23238 IX86_BUILTIN_PFCMPGT,
23239 IX86_BUILTIN_PFMAX,
23240 IX86_BUILTIN_PFMIN,
23241 IX86_BUILTIN_PFMUL,
23242 IX86_BUILTIN_PFRCP,
23243 IX86_BUILTIN_PFRCPIT1,
23244 IX86_BUILTIN_PFRCPIT2,
23245 IX86_BUILTIN_PFRSQIT1,
23246 IX86_BUILTIN_PFRSQRT,
23247 IX86_BUILTIN_PFSUB,
23248 IX86_BUILTIN_PFSUBR,
23249 IX86_BUILTIN_PI2FD,
23250 IX86_BUILTIN_PMULHRW,
23251
23252 /* 3DNow! Athlon Extensions */
23253 IX86_BUILTIN_PF2IW,
23254 IX86_BUILTIN_PFNACC,
23255 IX86_BUILTIN_PFPNACC,
23256 IX86_BUILTIN_PI2FW,
23257 IX86_BUILTIN_PSWAPDSI,
23258 IX86_BUILTIN_PSWAPDSF,
23259
23260 /* SSE2 */
23261 IX86_BUILTIN_ADDPD,
23262 IX86_BUILTIN_ADDSD,
23263 IX86_BUILTIN_DIVPD,
23264 IX86_BUILTIN_DIVSD,
23265 IX86_BUILTIN_MULPD,
23266 IX86_BUILTIN_MULSD,
23267 IX86_BUILTIN_SUBPD,
23268 IX86_BUILTIN_SUBSD,
23269
23270 IX86_BUILTIN_CMPEQPD,
23271 IX86_BUILTIN_CMPLTPD,
23272 IX86_BUILTIN_CMPLEPD,
23273 IX86_BUILTIN_CMPGTPD,
23274 IX86_BUILTIN_CMPGEPD,
23275 IX86_BUILTIN_CMPNEQPD,
23276 IX86_BUILTIN_CMPNLTPD,
23277 IX86_BUILTIN_CMPNLEPD,
23278 IX86_BUILTIN_CMPNGTPD,
23279 IX86_BUILTIN_CMPNGEPD,
23280 IX86_BUILTIN_CMPORDPD,
23281 IX86_BUILTIN_CMPUNORDPD,
23282 IX86_BUILTIN_CMPEQSD,
23283 IX86_BUILTIN_CMPLTSD,
23284 IX86_BUILTIN_CMPLESD,
23285 IX86_BUILTIN_CMPNEQSD,
23286 IX86_BUILTIN_CMPNLTSD,
23287 IX86_BUILTIN_CMPNLESD,
23288 IX86_BUILTIN_CMPORDSD,
23289 IX86_BUILTIN_CMPUNORDSD,
23290
23291 IX86_BUILTIN_COMIEQSD,
23292 IX86_BUILTIN_COMILTSD,
23293 IX86_BUILTIN_COMILESD,
23294 IX86_BUILTIN_COMIGTSD,
23295 IX86_BUILTIN_COMIGESD,
23296 IX86_BUILTIN_COMINEQSD,
23297 IX86_BUILTIN_UCOMIEQSD,
23298 IX86_BUILTIN_UCOMILTSD,
23299 IX86_BUILTIN_UCOMILESD,
23300 IX86_BUILTIN_UCOMIGTSD,
23301 IX86_BUILTIN_UCOMIGESD,
23302 IX86_BUILTIN_UCOMINEQSD,
23303
23304 IX86_BUILTIN_MAXPD,
23305 IX86_BUILTIN_MAXSD,
23306 IX86_BUILTIN_MINPD,
23307 IX86_BUILTIN_MINSD,
23308
23309 IX86_BUILTIN_ANDPD,
23310 IX86_BUILTIN_ANDNPD,
23311 IX86_BUILTIN_ORPD,
23312 IX86_BUILTIN_XORPD,
23313
23314 IX86_BUILTIN_SQRTPD,
23315 IX86_BUILTIN_SQRTSD,
23316
23317 IX86_BUILTIN_UNPCKHPD,
23318 IX86_BUILTIN_UNPCKLPD,
23319
23320 IX86_BUILTIN_SHUFPD,
23321
23322 IX86_BUILTIN_LOADUPD,
23323 IX86_BUILTIN_STOREUPD,
23324 IX86_BUILTIN_MOVSD,
23325
23326 IX86_BUILTIN_LOADHPD,
23327 IX86_BUILTIN_LOADLPD,
23328
23329 IX86_BUILTIN_CVTDQ2PD,
23330 IX86_BUILTIN_CVTDQ2PS,
23331
23332 IX86_BUILTIN_CVTPD2DQ,
23333 IX86_BUILTIN_CVTPD2PI,
23334 IX86_BUILTIN_CVTPD2PS,
23335 IX86_BUILTIN_CVTTPD2DQ,
23336 IX86_BUILTIN_CVTTPD2PI,
23337
23338 IX86_BUILTIN_CVTPI2PD,
23339 IX86_BUILTIN_CVTSI2SD,
23340 IX86_BUILTIN_CVTSI642SD,
23341
23342 IX86_BUILTIN_CVTSD2SI,
23343 IX86_BUILTIN_CVTSD2SI64,
23344 IX86_BUILTIN_CVTSD2SS,
23345 IX86_BUILTIN_CVTSS2SD,
23346 IX86_BUILTIN_CVTTSD2SI,
23347 IX86_BUILTIN_CVTTSD2SI64,
23348
23349 IX86_BUILTIN_CVTPS2DQ,
23350 IX86_BUILTIN_CVTPS2PD,
23351 IX86_BUILTIN_CVTTPS2DQ,
23352
23353 IX86_BUILTIN_MOVNTI,
23354 IX86_BUILTIN_MOVNTPD,
23355 IX86_BUILTIN_MOVNTDQ,
23356
23357 IX86_BUILTIN_MOVQ128,
23358
23359 /* SSE2 MMX */
23360 IX86_BUILTIN_MASKMOVDQU,
23361 IX86_BUILTIN_MOVMSKPD,
23362 IX86_BUILTIN_PMOVMSKB128,
23363
23364 IX86_BUILTIN_PACKSSWB128,
23365 IX86_BUILTIN_PACKSSDW128,
23366 IX86_BUILTIN_PACKUSWB128,
23367
23368 IX86_BUILTIN_PADDB128,
23369 IX86_BUILTIN_PADDW128,
23370 IX86_BUILTIN_PADDD128,
23371 IX86_BUILTIN_PADDQ128,
23372 IX86_BUILTIN_PADDSB128,
23373 IX86_BUILTIN_PADDSW128,
23374 IX86_BUILTIN_PADDUSB128,
23375 IX86_BUILTIN_PADDUSW128,
23376 IX86_BUILTIN_PSUBB128,
23377 IX86_BUILTIN_PSUBW128,
23378 IX86_BUILTIN_PSUBD128,
23379 IX86_BUILTIN_PSUBQ128,
23380 IX86_BUILTIN_PSUBSB128,
23381 IX86_BUILTIN_PSUBSW128,
23382 IX86_BUILTIN_PSUBUSB128,
23383 IX86_BUILTIN_PSUBUSW128,
23384
23385 IX86_BUILTIN_PAND128,
23386 IX86_BUILTIN_PANDN128,
23387 IX86_BUILTIN_POR128,
23388 IX86_BUILTIN_PXOR128,
23389
23390 IX86_BUILTIN_PAVGB128,
23391 IX86_BUILTIN_PAVGW128,
23392
23393 IX86_BUILTIN_PCMPEQB128,
23394 IX86_BUILTIN_PCMPEQW128,
23395 IX86_BUILTIN_PCMPEQD128,
23396 IX86_BUILTIN_PCMPGTB128,
23397 IX86_BUILTIN_PCMPGTW128,
23398 IX86_BUILTIN_PCMPGTD128,
23399
23400 IX86_BUILTIN_PMADDWD128,
23401
23402 IX86_BUILTIN_PMAXSW128,
23403 IX86_BUILTIN_PMAXUB128,
23404 IX86_BUILTIN_PMINSW128,
23405 IX86_BUILTIN_PMINUB128,
23406
23407 IX86_BUILTIN_PMULUDQ,
23408 IX86_BUILTIN_PMULUDQ128,
23409 IX86_BUILTIN_PMULHUW128,
23410 IX86_BUILTIN_PMULHW128,
23411 IX86_BUILTIN_PMULLW128,
23412
23413 IX86_BUILTIN_PSADBW128,
23414 IX86_BUILTIN_PSHUFHW,
23415 IX86_BUILTIN_PSHUFLW,
23416 IX86_BUILTIN_PSHUFD,
23417
23418 IX86_BUILTIN_PSLLDQI128,
23419 IX86_BUILTIN_PSLLWI128,
23420 IX86_BUILTIN_PSLLDI128,
23421 IX86_BUILTIN_PSLLQI128,
23422 IX86_BUILTIN_PSRAWI128,
23423 IX86_BUILTIN_PSRADI128,
23424 IX86_BUILTIN_PSRLDQI128,
23425 IX86_BUILTIN_PSRLWI128,
23426 IX86_BUILTIN_PSRLDI128,
23427 IX86_BUILTIN_PSRLQI128,
23428
23429 IX86_BUILTIN_PSLLDQ128,
23430 IX86_BUILTIN_PSLLW128,
23431 IX86_BUILTIN_PSLLD128,
23432 IX86_BUILTIN_PSLLQ128,
23433 IX86_BUILTIN_PSRAW128,
23434 IX86_BUILTIN_PSRAD128,
23435 IX86_BUILTIN_PSRLW128,
23436 IX86_BUILTIN_PSRLD128,
23437 IX86_BUILTIN_PSRLQ128,
23438
23439 IX86_BUILTIN_PUNPCKHBW128,
23440 IX86_BUILTIN_PUNPCKHWD128,
23441 IX86_BUILTIN_PUNPCKHDQ128,
23442 IX86_BUILTIN_PUNPCKHQDQ128,
23443 IX86_BUILTIN_PUNPCKLBW128,
23444 IX86_BUILTIN_PUNPCKLWD128,
23445 IX86_BUILTIN_PUNPCKLDQ128,
23446 IX86_BUILTIN_PUNPCKLQDQ128,
23447
23448 IX86_BUILTIN_CLFLUSH,
23449 IX86_BUILTIN_MFENCE,
23450 IX86_BUILTIN_LFENCE,
23451 IX86_BUILTIN_PAUSE,
23452
23453 IX86_BUILTIN_BSRSI,
23454 IX86_BUILTIN_BSRDI,
23455 IX86_BUILTIN_RDPMC,
23456 IX86_BUILTIN_RDTSC,
23457 IX86_BUILTIN_RDTSCP,
23458 IX86_BUILTIN_ROLQI,
23459 IX86_BUILTIN_ROLHI,
23460 IX86_BUILTIN_RORQI,
23461 IX86_BUILTIN_RORHI,
23462
23463 /* SSE3. */
23464 IX86_BUILTIN_ADDSUBPS,
23465 IX86_BUILTIN_HADDPS,
23466 IX86_BUILTIN_HSUBPS,
23467 IX86_BUILTIN_MOVSHDUP,
23468 IX86_BUILTIN_MOVSLDUP,
23469 IX86_BUILTIN_ADDSUBPD,
23470 IX86_BUILTIN_HADDPD,
23471 IX86_BUILTIN_HSUBPD,
23472 IX86_BUILTIN_LDDQU,
23473
23474 IX86_BUILTIN_MONITOR,
23475 IX86_BUILTIN_MWAIT,
23476
23477 /* SSSE3. */
23478 IX86_BUILTIN_PHADDW,
23479 IX86_BUILTIN_PHADDD,
23480 IX86_BUILTIN_PHADDSW,
23481 IX86_BUILTIN_PHSUBW,
23482 IX86_BUILTIN_PHSUBD,
23483 IX86_BUILTIN_PHSUBSW,
23484 IX86_BUILTIN_PMADDUBSW,
23485 IX86_BUILTIN_PMULHRSW,
23486 IX86_BUILTIN_PSHUFB,
23487 IX86_BUILTIN_PSIGNB,
23488 IX86_BUILTIN_PSIGNW,
23489 IX86_BUILTIN_PSIGND,
23490 IX86_BUILTIN_PALIGNR,
23491 IX86_BUILTIN_PABSB,
23492 IX86_BUILTIN_PABSW,
23493 IX86_BUILTIN_PABSD,
23494
23495 IX86_BUILTIN_PHADDW128,
23496 IX86_BUILTIN_PHADDD128,
23497 IX86_BUILTIN_PHADDSW128,
23498 IX86_BUILTIN_PHSUBW128,
23499 IX86_BUILTIN_PHSUBD128,
23500 IX86_BUILTIN_PHSUBSW128,
23501 IX86_BUILTIN_PMADDUBSW128,
23502 IX86_BUILTIN_PMULHRSW128,
23503 IX86_BUILTIN_PSHUFB128,
23504 IX86_BUILTIN_PSIGNB128,
23505 IX86_BUILTIN_PSIGNW128,
23506 IX86_BUILTIN_PSIGND128,
23507 IX86_BUILTIN_PALIGNR128,
23508 IX86_BUILTIN_PABSB128,
23509 IX86_BUILTIN_PABSW128,
23510 IX86_BUILTIN_PABSD128,
23511
23512 /* AMDFAM10 - SSE4A New Instructions. */
23513 IX86_BUILTIN_MOVNTSD,
23514 IX86_BUILTIN_MOVNTSS,
23515 IX86_BUILTIN_EXTRQI,
23516 IX86_BUILTIN_EXTRQ,
23517 IX86_BUILTIN_INSERTQI,
23518 IX86_BUILTIN_INSERTQ,
23519
23520 /* SSE4.1. */
23521 IX86_BUILTIN_BLENDPD,
23522 IX86_BUILTIN_BLENDPS,
23523 IX86_BUILTIN_BLENDVPD,
23524 IX86_BUILTIN_BLENDVPS,
23525 IX86_BUILTIN_PBLENDVB128,
23526 IX86_BUILTIN_PBLENDW128,
23527
23528 IX86_BUILTIN_DPPD,
23529 IX86_BUILTIN_DPPS,
23530
23531 IX86_BUILTIN_INSERTPS128,
23532
23533 IX86_BUILTIN_MOVNTDQA,
23534 IX86_BUILTIN_MPSADBW128,
23535 IX86_BUILTIN_PACKUSDW128,
23536 IX86_BUILTIN_PCMPEQQ,
23537 IX86_BUILTIN_PHMINPOSUW128,
23538
23539 IX86_BUILTIN_PMAXSB128,
23540 IX86_BUILTIN_PMAXSD128,
23541 IX86_BUILTIN_PMAXUD128,
23542 IX86_BUILTIN_PMAXUW128,
23543
23544 IX86_BUILTIN_PMINSB128,
23545 IX86_BUILTIN_PMINSD128,
23546 IX86_BUILTIN_PMINUD128,
23547 IX86_BUILTIN_PMINUW128,
23548
23549 IX86_BUILTIN_PMOVSXBW128,
23550 IX86_BUILTIN_PMOVSXBD128,
23551 IX86_BUILTIN_PMOVSXBQ128,
23552 IX86_BUILTIN_PMOVSXWD128,
23553 IX86_BUILTIN_PMOVSXWQ128,
23554 IX86_BUILTIN_PMOVSXDQ128,
23555
23556 IX86_BUILTIN_PMOVZXBW128,
23557 IX86_BUILTIN_PMOVZXBD128,
23558 IX86_BUILTIN_PMOVZXBQ128,
23559 IX86_BUILTIN_PMOVZXWD128,
23560 IX86_BUILTIN_PMOVZXWQ128,
23561 IX86_BUILTIN_PMOVZXDQ128,
23562
23563 IX86_BUILTIN_PMULDQ128,
23564 IX86_BUILTIN_PMULLD128,
23565
23566 IX86_BUILTIN_ROUNDPD,
23567 IX86_BUILTIN_ROUNDPS,
23568 IX86_BUILTIN_ROUNDSD,
23569 IX86_BUILTIN_ROUNDSS,
23570
23571 IX86_BUILTIN_FLOORPD,
23572 IX86_BUILTIN_CEILPD,
23573 IX86_BUILTIN_TRUNCPD,
23574 IX86_BUILTIN_RINTPD,
23575 IX86_BUILTIN_FLOORPS,
23576 IX86_BUILTIN_CEILPS,
23577 IX86_BUILTIN_TRUNCPS,
23578 IX86_BUILTIN_RINTPS,
23579
23580 IX86_BUILTIN_PTESTZ,
23581 IX86_BUILTIN_PTESTC,
23582 IX86_BUILTIN_PTESTNZC,
23583
23584 IX86_BUILTIN_VEC_INIT_V2SI,
23585 IX86_BUILTIN_VEC_INIT_V4HI,
23586 IX86_BUILTIN_VEC_INIT_V8QI,
23587 IX86_BUILTIN_VEC_EXT_V2DF,
23588 IX86_BUILTIN_VEC_EXT_V2DI,
23589 IX86_BUILTIN_VEC_EXT_V4SF,
23590 IX86_BUILTIN_VEC_EXT_V4SI,
23591 IX86_BUILTIN_VEC_EXT_V8HI,
23592 IX86_BUILTIN_VEC_EXT_V2SI,
23593 IX86_BUILTIN_VEC_EXT_V4HI,
23594 IX86_BUILTIN_VEC_EXT_V16QI,
23595 IX86_BUILTIN_VEC_SET_V2DI,
23596 IX86_BUILTIN_VEC_SET_V4SF,
23597 IX86_BUILTIN_VEC_SET_V4SI,
23598 IX86_BUILTIN_VEC_SET_V8HI,
23599 IX86_BUILTIN_VEC_SET_V4HI,
23600 IX86_BUILTIN_VEC_SET_V16QI,
23601
23602 IX86_BUILTIN_VEC_PACK_SFIX,
23603
23604 /* SSE4.2. */
23605 IX86_BUILTIN_CRC32QI,
23606 IX86_BUILTIN_CRC32HI,
23607 IX86_BUILTIN_CRC32SI,
23608 IX86_BUILTIN_CRC32DI,
23609
23610 IX86_BUILTIN_PCMPESTRI128,
23611 IX86_BUILTIN_PCMPESTRM128,
23612 IX86_BUILTIN_PCMPESTRA128,
23613 IX86_BUILTIN_PCMPESTRC128,
23614 IX86_BUILTIN_PCMPESTRO128,
23615 IX86_BUILTIN_PCMPESTRS128,
23616 IX86_BUILTIN_PCMPESTRZ128,
23617 IX86_BUILTIN_PCMPISTRI128,
23618 IX86_BUILTIN_PCMPISTRM128,
23619 IX86_BUILTIN_PCMPISTRA128,
23620 IX86_BUILTIN_PCMPISTRC128,
23621 IX86_BUILTIN_PCMPISTRO128,
23622 IX86_BUILTIN_PCMPISTRS128,
23623 IX86_BUILTIN_PCMPISTRZ128,
23624
23625 IX86_BUILTIN_PCMPGTQ,
23626
23627 /* AES instructions */
23628 IX86_BUILTIN_AESENC128,
23629 IX86_BUILTIN_AESENCLAST128,
23630 IX86_BUILTIN_AESDEC128,
23631 IX86_BUILTIN_AESDECLAST128,
23632 IX86_BUILTIN_AESIMC128,
23633 IX86_BUILTIN_AESKEYGENASSIST128,
23634
23635 /* PCLMUL instruction */
23636 IX86_BUILTIN_PCLMULQDQ128,
23637
23638 /* AVX */
23639 IX86_BUILTIN_ADDPD256,
23640 IX86_BUILTIN_ADDPS256,
23641 IX86_BUILTIN_ADDSUBPD256,
23642 IX86_BUILTIN_ADDSUBPS256,
23643 IX86_BUILTIN_ANDPD256,
23644 IX86_BUILTIN_ANDPS256,
23645 IX86_BUILTIN_ANDNPD256,
23646 IX86_BUILTIN_ANDNPS256,
23647 IX86_BUILTIN_BLENDPD256,
23648 IX86_BUILTIN_BLENDPS256,
23649 IX86_BUILTIN_BLENDVPD256,
23650 IX86_BUILTIN_BLENDVPS256,
23651 IX86_BUILTIN_DIVPD256,
23652 IX86_BUILTIN_DIVPS256,
23653 IX86_BUILTIN_DPPS256,
23654 IX86_BUILTIN_HADDPD256,
23655 IX86_BUILTIN_HADDPS256,
23656 IX86_BUILTIN_HSUBPD256,
23657 IX86_BUILTIN_HSUBPS256,
23658 IX86_BUILTIN_MAXPD256,
23659 IX86_BUILTIN_MAXPS256,
23660 IX86_BUILTIN_MINPD256,
23661 IX86_BUILTIN_MINPS256,
23662 IX86_BUILTIN_MULPD256,
23663 IX86_BUILTIN_MULPS256,
23664 IX86_BUILTIN_ORPD256,
23665 IX86_BUILTIN_ORPS256,
23666 IX86_BUILTIN_SHUFPD256,
23667 IX86_BUILTIN_SHUFPS256,
23668 IX86_BUILTIN_SUBPD256,
23669 IX86_BUILTIN_SUBPS256,
23670 IX86_BUILTIN_XORPD256,
23671 IX86_BUILTIN_XORPS256,
23672 IX86_BUILTIN_CMPSD,
23673 IX86_BUILTIN_CMPSS,
23674 IX86_BUILTIN_CMPPD,
23675 IX86_BUILTIN_CMPPS,
23676 IX86_BUILTIN_CMPPD256,
23677 IX86_BUILTIN_CMPPS256,
23678 IX86_BUILTIN_CVTDQ2PD256,
23679 IX86_BUILTIN_CVTDQ2PS256,
23680 IX86_BUILTIN_CVTPD2PS256,
23681 IX86_BUILTIN_CVTPS2DQ256,
23682 IX86_BUILTIN_CVTPS2PD256,
23683 IX86_BUILTIN_CVTTPD2DQ256,
23684 IX86_BUILTIN_CVTPD2DQ256,
23685 IX86_BUILTIN_CVTTPS2DQ256,
23686 IX86_BUILTIN_EXTRACTF128PD256,
23687 IX86_BUILTIN_EXTRACTF128PS256,
23688 IX86_BUILTIN_EXTRACTF128SI256,
23689 IX86_BUILTIN_VZEROALL,
23690 IX86_BUILTIN_VZEROUPPER,
23691 IX86_BUILTIN_VPERMILVARPD,
23692 IX86_BUILTIN_VPERMILVARPS,
23693 IX86_BUILTIN_VPERMILVARPD256,
23694 IX86_BUILTIN_VPERMILVARPS256,
23695 IX86_BUILTIN_VPERMILPD,
23696 IX86_BUILTIN_VPERMILPS,
23697 IX86_BUILTIN_VPERMILPD256,
23698 IX86_BUILTIN_VPERMILPS256,
23699 IX86_BUILTIN_VPERMIL2PD,
23700 IX86_BUILTIN_VPERMIL2PS,
23701 IX86_BUILTIN_VPERMIL2PD256,
23702 IX86_BUILTIN_VPERMIL2PS256,
23703 IX86_BUILTIN_VPERM2F128PD256,
23704 IX86_BUILTIN_VPERM2F128PS256,
23705 IX86_BUILTIN_VPERM2F128SI256,
23706 IX86_BUILTIN_VBROADCASTSS,
23707 IX86_BUILTIN_VBROADCASTSD256,
23708 IX86_BUILTIN_VBROADCASTSS256,
23709 IX86_BUILTIN_VBROADCASTPD256,
23710 IX86_BUILTIN_VBROADCASTPS256,
23711 IX86_BUILTIN_VINSERTF128PD256,
23712 IX86_BUILTIN_VINSERTF128PS256,
23713 IX86_BUILTIN_VINSERTF128SI256,
23714 IX86_BUILTIN_LOADUPD256,
23715 IX86_BUILTIN_LOADUPS256,
23716 IX86_BUILTIN_STOREUPD256,
23717 IX86_BUILTIN_STOREUPS256,
23718 IX86_BUILTIN_LDDQU256,
23719 IX86_BUILTIN_MOVNTDQ256,
23720 IX86_BUILTIN_MOVNTPD256,
23721 IX86_BUILTIN_MOVNTPS256,
23722 IX86_BUILTIN_LOADDQU256,
23723 IX86_BUILTIN_STOREDQU256,
23724 IX86_BUILTIN_MASKLOADPD,
23725 IX86_BUILTIN_MASKLOADPS,
23726 IX86_BUILTIN_MASKSTOREPD,
23727 IX86_BUILTIN_MASKSTOREPS,
23728 IX86_BUILTIN_MASKLOADPD256,
23729 IX86_BUILTIN_MASKLOADPS256,
23730 IX86_BUILTIN_MASKSTOREPD256,
23731 IX86_BUILTIN_MASKSTOREPS256,
23732 IX86_BUILTIN_MOVSHDUP256,
23733 IX86_BUILTIN_MOVSLDUP256,
23734 IX86_BUILTIN_MOVDDUP256,
23735
23736 IX86_BUILTIN_SQRTPD256,
23737 IX86_BUILTIN_SQRTPS256,
23738 IX86_BUILTIN_SQRTPS_NR256,
23739 IX86_BUILTIN_RSQRTPS256,
23740 IX86_BUILTIN_RSQRTPS_NR256,
23741
23742 IX86_BUILTIN_RCPPS256,
23743
23744 IX86_BUILTIN_ROUNDPD256,
23745 IX86_BUILTIN_ROUNDPS256,
23746
23747 IX86_BUILTIN_FLOORPD256,
23748 IX86_BUILTIN_CEILPD256,
23749 IX86_BUILTIN_TRUNCPD256,
23750 IX86_BUILTIN_RINTPD256,
23751 IX86_BUILTIN_FLOORPS256,
23752 IX86_BUILTIN_CEILPS256,
23753 IX86_BUILTIN_TRUNCPS256,
23754 IX86_BUILTIN_RINTPS256,
23755
23756 IX86_BUILTIN_UNPCKHPD256,
23757 IX86_BUILTIN_UNPCKLPD256,
23758 IX86_BUILTIN_UNPCKHPS256,
23759 IX86_BUILTIN_UNPCKLPS256,
23760
23761 IX86_BUILTIN_SI256_SI,
23762 IX86_BUILTIN_PS256_PS,
23763 IX86_BUILTIN_PD256_PD,
23764 IX86_BUILTIN_SI_SI256,
23765 IX86_BUILTIN_PS_PS256,
23766 IX86_BUILTIN_PD_PD256,
23767
23768 IX86_BUILTIN_VTESTZPD,
23769 IX86_BUILTIN_VTESTCPD,
23770 IX86_BUILTIN_VTESTNZCPD,
23771 IX86_BUILTIN_VTESTZPS,
23772 IX86_BUILTIN_VTESTCPS,
23773 IX86_BUILTIN_VTESTNZCPS,
23774 IX86_BUILTIN_VTESTZPD256,
23775 IX86_BUILTIN_VTESTCPD256,
23776 IX86_BUILTIN_VTESTNZCPD256,
23777 IX86_BUILTIN_VTESTZPS256,
23778 IX86_BUILTIN_VTESTCPS256,
23779 IX86_BUILTIN_VTESTNZCPS256,
23780 IX86_BUILTIN_PTESTZ256,
23781 IX86_BUILTIN_PTESTC256,
23782 IX86_BUILTIN_PTESTNZC256,
23783
23784 IX86_BUILTIN_MOVMSKPD256,
23785 IX86_BUILTIN_MOVMSKPS256,
23786
23787 /* TFmode support builtins. */
23788 IX86_BUILTIN_INFQ,
23789 IX86_BUILTIN_HUGE_VALQ,
23790 IX86_BUILTIN_FABSQ,
23791 IX86_BUILTIN_COPYSIGNQ,
23792
23793 /* Vectorizer support builtins. */
23794 IX86_BUILTIN_CPYSGNPS,
23795 IX86_BUILTIN_CPYSGNPD,
23796 IX86_BUILTIN_CPYSGNPS256,
23797 IX86_BUILTIN_CPYSGNPD256,
23798
23799 IX86_BUILTIN_CVTUDQ2PS,
23800
23801 IX86_BUILTIN_VEC_PERM_V2DF,
23802 IX86_BUILTIN_VEC_PERM_V4SF,
23803 IX86_BUILTIN_VEC_PERM_V2DI,
23804 IX86_BUILTIN_VEC_PERM_V4SI,
23805 IX86_BUILTIN_VEC_PERM_V8HI,
23806 IX86_BUILTIN_VEC_PERM_V16QI,
23807 IX86_BUILTIN_VEC_PERM_V2DI_U,
23808 IX86_BUILTIN_VEC_PERM_V4SI_U,
23809 IX86_BUILTIN_VEC_PERM_V8HI_U,
23810 IX86_BUILTIN_VEC_PERM_V16QI_U,
23811 IX86_BUILTIN_VEC_PERM_V4DF,
23812 IX86_BUILTIN_VEC_PERM_V8SF,
23813
23814 /* FMA4 and XOP instructions. */
23815 IX86_BUILTIN_VFMADDSS,
23816 IX86_BUILTIN_VFMADDSD,
23817 IX86_BUILTIN_VFMADDPS,
23818 IX86_BUILTIN_VFMADDPD,
23819 IX86_BUILTIN_VFMADDPS256,
23820 IX86_BUILTIN_VFMADDPD256,
23821 IX86_BUILTIN_VFMADDSUBPS,
23822 IX86_BUILTIN_VFMADDSUBPD,
23823 IX86_BUILTIN_VFMADDSUBPS256,
23824 IX86_BUILTIN_VFMADDSUBPD256,
23825
23826 IX86_BUILTIN_VPCMOV,
23827 IX86_BUILTIN_VPCMOV_V2DI,
23828 IX86_BUILTIN_VPCMOV_V4SI,
23829 IX86_BUILTIN_VPCMOV_V8HI,
23830 IX86_BUILTIN_VPCMOV_V16QI,
23831 IX86_BUILTIN_VPCMOV_V4SF,
23832 IX86_BUILTIN_VPCMOV_V2DF,
23833 IX86_BUILTIN_VPCMOV256,
23834 IX86_BUILTIN_VPCMOV_V4DI256,
23835 IX86_BUILTIN_VPCMOV_V8SI256,
23836 IX86_BUILTIN_VPCMOV_V16HI256,
23837 IX86_BUILTIN_VPCMOV_V32QI256,
23838 IX86_BUILTIN_VPCMOV_V8SF256,
23839 IX86_BUILTIN_VPCMOV_V4DF256,
23840
23841 IX86_BUILTIN_VPPERM,
23842
23843 IX86_BUILTIN_VPMACSSWW,
23844 IX86_BUILTIN_VPMACSWW,
23845 IX86_BUILTIN_VPMACSSWD,
23846 IX86_BUILTIN_VPMACSWD,
23847 IX86_BUILTIN_VPMACSSDD,
23848 IX86_BUILTIN_VPMACSDD,
23849 IX86_BUILTIN_VPMACSSDQL,
23850 IX86_BUILTIN_VPMACSSDQH,
23851 IX86_BUILTIN_VPMACSDQL,
23852 IX86_BUILTIN_VPMACSDQH,
23853 IX86_BUILTIN_VPMADCSSWD,
23854 IX86_BUILTIN_VPMADCSWD,
23855
23856 IX86_BUILTIN_VPHADDBW,
23857 IX86_BUILTIN_VPHADDBD,
23858 IX86_BUILTIN_VPHADDBQ,
23859 IX86_BUILTIN_VPHADDWD,
23860 IX86_BUILTIN_VPHADDWQ,
23861 IX86_BUILTIN_VPHADDDQ,
23862 IX86_BUILTIN_VPHADDUBW,
23863 IX86_BUILTIN_VPHADDUBD,
23864 IX86_BUILTIN_VPHADDUBQ,
23865 IX86_BUILTIN_VPHADDUWD,
23866 IX86_BUILTIN_VPHADDUWQ,
23867 IX86_BUILTIN_VPHADDUDQ,
23868 IX86_BUILTIN_VPHSUBBW,
23869 IX86_BUILTIN_VPHSUBWD,
23870 IX86_BUILTIN_VPHSUBDQ,
23871
23872 IX86_BUILTIN_VPROTB,
23873 IX86_BUILTIN_VPROTW,
23874 IX86_BUILTIN_VPROTD,
23875 IX86_BUILTIN_VPROTQ,
23876 IX86_BUILTIN_VPROTB_IMM,
23877 IX86_BUILTIN_VPROTW_IMM,
23878 IX86_BUILTIN_VPROTD_IMM,
23879 IX86_BUILTIN_VPROTQ_IMM,
23880
23881 IX86_BUILTIN_VPSHLB,
23882 IX86_BUILTIN_VPSHLW,
23883 IX86_BUILTIN_VPSHLD,
23884 IX86_BUILTIN_VPSHLQ,
23885 IX86_BUILTIN_VPSHAB,
23886 IX86_BUILTIN_VPSHAW,
23887 IX86_BUILTIN_VPSHAD,
23888 IX86_BUILTIN_VPSHAQ,
23889
23890 IX86_BUILTIN_VFRCZSS,
23891 IX86_BUILTIN_VFRCZSD,
23892 IX86_BUILTIN_VFRCZPS,
23893 IX86_BUILTIN_VFRCZPD,
23894 IX86_BUILTIN_VFRCZPS256,
23895 IX86_BUILTIN_VFRCZPD256,
23896
23897 IX86_BUILTIN_VPCOMEQUB,
23898 IX86_BUILTIN_VPCOMNEUB,
23899 IX86_BUILTIN_VPCOMLTUB,
23900 IX86_BUILTIN_VPCOMLEUB,
23901 IX86_BUILTIN_VPCOMGTUB,
23902 IX86_BUILTIN_VPCOMGEUB,
23903 IX86_BUILTIN_VPCOMFALSEUB,
23904 IX86_BUILTIN_VPCOMTRUEUB,
23905
23906 IX86_BUILTIN_VPCOMEQUW,
23907 IX86_BUILTIN_VPCOMNEUW,
23908 IX86_BUILTIN_VPCOMLTUW,
23909 IX86_BUILTIN_VPCOMLEUW,
23910 IX86_BUILTIN_VPCOMGTUW,
23911 IX86_BUILTIN_VPCOMGEUW,
23912 IX86_BUILTIN_VPCOMFALSEUW,
23913 IX86_BUILTIN_VPCOMTRUEUW,
23914
23915 IX86_BUILTIN_VPCOMEQUD,
23916 IX86_BUILTIN_VPCOMNEUD,
23917 IX86_BUILTIN_VPCOMLTUD,
23918 IX86_BUILTIN_VPCOMLEUD,
23919 IX86_BUILTIN_VPCOMGTUD,
23920 IX86_BUILTIN_VPCOMGEUD,
23921 IX86_BUILTIN_VPCOMFALSEUD,
23922 IX86_BUILTIN_VPCOMTRUEUD,
23923
23924 IX86_BUILTIN_VPCOMEQUQ,
23925 IX86_BUILTIN_VPCOMNEUQ,
23926 IX86_BUILTIN_VPCOMLTUQ,
23927 IX86_BUILTIN_VPCOMLEUQ,
23928 IX86_BUILTIN_VPCOMGTUQ,
23929 IX86_BUILTIN_VPCOMGEUQ,
23930 IX86_BUILTIN_VPCOMFALSEUQ,
23931 IX86_BUILTIN_VPCOMTRUEUQ,
23932
23933 IX86_BUILTIN_VPCOMEQB,
23934 IX86_BUILTIN_VPCOMNEB,
23935 IX86_BUILTIN_VPCOMLTB,
23936 IX86_BUILTIN_VPCOMLEB,
23937 IX86_BUILTIN_VPCOMGTB,
23938 IX86_BUILTIN_VPCOMGEB,
23939 IX86_BUILTIN_VPCOMFALSEB,
23940 IX86_BUILTIN_VPCOMTRUEB,
23941
23942 IX86_BUILTIN_VPCOMEQW,
23943 IX86_BUILTIN_VPCOMNEW,
23944 IX86_BUILTIN_VPCOMLTW,
23945 IX86_BUILTIN_VPCOMLEW,
23946 IX86_BUILTIN_VPCOMGTW,
23947 IX86_BUILTIN_VPCOMGEW,
23948 IX86_BUILTIN_VPCOMFALSEW,
23949 IX86_BUILTIN_VPCOMTRUEW,
23950
23951 IX86_BUILTIN_VPCOMEQD,
23952 IX86_BUILTIN_VPCOMNED,
23953 IX86_BUILTIN_VPCOMLTD,
23954 IX86_BUILTIN_VPCOMLED,
23955 IX86_BUILTIN_VPCOMGTD,
23956 IX86_BUILTIN_VPCOMGED,
23957 IX86_BUILTIN_VPCOMFALSED,
23958 IX86_BUILTIN_VPCOMTRUED,
23959
23960 IX86_BUILTIN_VPCOMEQQ,
23961 IX86_BUILTIN_VPCOMNEQ,
23962 IX86_BUILTIN_VPCOMLTQ,
23963 IX86_BUILTIN_VPCOMLEQ,
23964 IX86_BUILTIN_VPCOMGTQ,
23965 IX86_BUILTIN_VPCOMGEQ,
23966 IX86_BUILTIN_VPCOMFALSEQ,
23967 IX86_BUILTIN_VPCOMTRUEQ,
23968
23969 /* LWP instructions. */
23970 IX86_BUILTIN_LLWPCB,
23971 IX86_BUILTIN_SLWPCB,
23972 IX86_BUILTIN_LWPVAL32,
23973 IX86_BUILTIN_LWPVAL64,
23974 IX86_BUILTIN_LWPINS32,
23975 IX86_BUILTIN_LWPINS64,
23976
23977 IX86_BUILTIN_CLZS,
23978
23979 /* BMI instructions. */
23980 IX86_BUILTIN_BEXTR32,
23981 IX86_BUILTIN_BEXTR64,
23982 IX86_BUILTIN_CTZS,
23983
23984 /* TBM instructions. */
23985 IX86_BUILTIN_BEXTRI32,
23986 IX86_BUILTIN_BEXTRI64,
23987
23988
23989 /* FSGSBASE instructions. */
23990 IX86_BUILTIN_RDFSBASE32,
23991 IX86_BUILTIN_RDFSBASE64,
23992 IX86_BUILTIN_RDGSBASE32,
23993 IX86_BUILTIN_RDGSBASE64,
23994 IX86_BUILTIN_WRFSBASE32,
23995 IX86_BUILTIN_WRFSBASE64,
23996 IX86_BUILTIN_WRGSBASE32,
23997 IX86_BUILTIN_WRGSBASE64,
23998
23999 /* RDRND instructions. */
24000 IX86_BUILTIN_RDRAND16_STEP,
24001 IX86_BUILTIN_RDRAND32_STEP,
24002 IX86_BUILTIN_RDRAND64_STEP,
24003
24004 /* F16C instructions. */
24005 IX86_BUILTIN_CVTPH2PS,
24006 IX86_BUILTIN_CVTPH2PS256,
24007 IX86_BUILTIN_CVTPS2PH,
24008 IX86_BUILTIN_CVTPS2PH256,
24009
24010 /* CFString built-in for darwin */
24011 IX86_BUILTIN_CFSTRING,
24012
24013 IX86_BUILTIN_MAX
24014 };
24015
24016 /* Table for the ix86 builtin decls. */
24017 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24018
24019 /* Table of all of the builtin functions that are possible with different ISA's
24020 but are waiting to be built until a function is declared to use that
24021 ISA. */
24022 struct builtin_isa {
24023 const char *name; /* function name */
24024 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24025 int isa; /* isa_flags this builtin is defined for */
24026 bool const_p; /* true if the declaration is constant */
24027 bool set_and_not_built_p;
24028 };
24029
24030 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24031
24032
24033 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24034 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24035 function decl in the ix86_builtins array. Returns the function decl or
24036 NULL_TREE, if the builtin was not added.
24037
24038 If the front end has a special hook for builtin functions, delay adding
24039 builtin functions that aren't in the current ISA until the ISA is changed
24040 with function specific optimization. Doing so, can save about 300K for the
24041 default compiler. When the builtin is expanded, check at that time whether
24042 it is valid.
24043
24044 If the front end doesn't have a special hook, record all builtins, even if
24045 it isn't an instruction set in the current ISA in case the user uses
24046 function specific options for a different ISA, so that we don't get scope
24047 errors if a builtin is added in the middle of a function scope. */
24048
24049 static inline tree
24050 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24051 enum ix86_builtins code)
24052 {
24053 tree decl = NULL_TREE;
24054
24055 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24056 {
24057 ix86_builtins_isa[(int) code].isa = mask;
24058
24059 mask &= ~OPTION_MASK_ISA_64BIT;
24060 if (mask == 0
24061 || (mask & ix86_isa_flags) != 0
24062 || (lang_hooks.builtin_function
24063 == lang_hooks.builtin_function_ext_scope))
24064
24065 {
24066 tree type = ix86_get_builtin_func_type (tcode);
24067 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24068 NULL, NULL_TREE);
24069 ix86_builtins[(int) code] = decl;
24070 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24071 }
24072 else
24073 {
24074 ix86_builtins[(int) code] = NULL_TREE;
24075 ix86_builtins_isa[(int) code].tcode = tcode;
24076 ix86_builtins_isa[(int) code].name = name;
24077 ix86_builtins_isa[(int) code].const_p = false;
24078 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24079 }
24080 }
24081
24082 return decl;
24083 }
24084
24085 /* Like def_builtin, but also marks the function decl "const". */
24086
24087 static inline tree
24088 def_builtin_const (int mask, const char *name,
24089 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24090 {
24091 tree decl = def_builtin (mask, name, tcode, code);
24092 if (decl)
24093 TREE_READONLY (decl) = 1;
24094 else
24095 ix86_builtins_isa[(int) code].const_p = true;
24096
24097 return decl;
24098 }
24099
24100 /* Add any new builtin functions for a given ISA that may not have been
24101 declared. This saves a bit of space compared to adding all of the
24102 declarations to the tree, even if we didn't use them. */
24103
24104 static void
24105 ix86_add_new_builtins (int isa)
24106 {
24107 int i;
24108
24109 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24110 {
24111 if ((ix86_builtins_isa[i].isa & isa) != 0
24112 && ix86_builtins_isa[i].set_and_not_built_p)
24113 {
24114 tree decl, type;
24115
24116 /* Don't define the builtin again. */
24117 ix86_builtins_isa[i].set_and_not_built_p = false;
24118
24119 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24120 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24121 type, i, BUILT_IN_MD, NULL,
24122 NULL_TREE);
24123
24124 ix86_builtins[i] = decl;
24125 if (ix86_builtins_isa[i].const_p)
24126 TREE_READONLY (decl) = 1;
24127 }
24128 }
24129 }
24130
24131 /* Bits for builtin_description.flag. */
24132
24133 /* Set when we don't support the comparison natively, and should
24134 swap_comparison in order to support it. */
24135 #define BUILTIN_DESC_SWAP_OPERANDS 1
24136
24137 struct builtin_description
24138 {
24139 const unsigned int mask;
24140 const enum insn_code icode;
24141 const char *const name;
24142 const enum ix86_builtins code;
24143 const enum rtx_code comparison;
24144 const int flag;
24145 };
24146
24147 static const struct builtin_description bdesc_comi[] =
24148 {
24149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24173 };
24174
24175 static const struct builtin_description bdesc_pcmpestr[] =
24176 {
24177 /* SSE4.2 */
24178 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24179 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24180 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24182 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24183 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24184 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24185 };
24186
24187 static const struct builtin_description bdesc_pcmpistr[] =
24188 {
24189 /* SSE4.2 */
24190 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24191 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24192 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24193 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24194 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24195 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24196 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24197 };
24198
24199 /* Special builtins with variable number of arguments. */
24200 static const struct builtin_description bdesc_special_args[] =
24201 {
24202 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24203 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24204 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24205
24206 /* MMX */
24207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24208
24209 /* 3DNow! */
24210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24211
24212 /* SSE */
24213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24216
24217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24221
24222 /* SSE or 3DNow!A */
24223 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24224 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24225
24226 /* SSE2 */
24227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24236
24237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24239
24240 /* SSE3 */
24241 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24242
24243 /* SSE4.1 */
24244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24245
24246 /* SSE4A */
24247 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24248 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24249
24250 /* AVX */
24251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24253
24254 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24255 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24256 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24259
24260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24267
24268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24271
24272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24280
24281 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24282 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24283 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24284 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24285 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24286 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24287
24288 /* FSGSBASE */
24289 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24290 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24291 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24292 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24293 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24294 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24295 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24296 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24297 };
24298
24299 /* Builtins with variable number of arguments. */
24300 static const struct builtin_description bdesc_args[] =
24301 {
24302 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24303 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24304 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24305 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24306 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24307 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24308 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24309
24310 /* MMX */
24311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24317
24318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24326
24327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24329
24330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24334
24335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24341
24342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24348
24349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24352
24353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24354
24355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24361
24362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24368
24369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24373
24374 /* 3DNow! */
24375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24379
24380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24395
24396 /* 3DNow!A */
24397 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24398 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24399 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24400 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24401 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24402 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24403
24404 /* SSE */
24405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24407 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24409 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24413 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24416 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24417
24418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24419
24420 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24421 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24422 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24428
24429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24451
24452 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24453 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24456
24457 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24459 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24460 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24461
24462 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24463
24464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24467 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24468 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24469
24470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24473
24474 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24475
24476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24479
24480 /* SSE MMX or 3Dnow!A */
24481 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24483 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24484
24485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24486 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24489
24490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24492
24493 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24494
24495 /* SSE2 */
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24497
24498 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24499 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24500 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24501 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24504 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24508 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24509 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24510
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24517
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24523
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24525
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24528 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24529 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24530
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24534
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24543
24544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24564
24565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24566 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24569
24570 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24572 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24573 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24574
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24576
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24579 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24580
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24582
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24587 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24591
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24600
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24603
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24608
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24611
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24618
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24623
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24632
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24636
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24639
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24642
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24644
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24646 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24649
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24657
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24659 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24665
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24670
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24674
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24676
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24679
24680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24681
24682 /* SSE2 MMX */
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24685
24686 /* SSE3 */
24687 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24688 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24689
24690 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24691 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24692 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24693 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24694 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24695 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24696
24697 /* SSSE3 */
24698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24704
24705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24729
24730 /* SSSE3. */
24731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24733
24734 /* SSE4.1 */
24735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24745
24746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24759
24760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24772
24773 /* SSE4.1 */
24774 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24775 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24776 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24777 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24778
24779 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24780 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24781 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24783
24784 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24785 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24786 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24788
24789 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24790 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24791 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24792
24793 /* SSE4.2 */
24794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24795 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24796 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24797 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24798 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24799
24800 /* SSE4A */
24801 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24802 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24803 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24804 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24805
24806 /* AES */
24807 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24809
24810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24811 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24812 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24813 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24814
24815 /* PCLMUL */
24816 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24817
24818 /* AVX */
24819 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24820 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24823 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24824 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24827 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24833 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24834 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24835 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24836 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24837 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24838 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24839 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24840 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24841 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24842 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24843 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24844 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24845
24846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24850
24851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24885
24886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24889
24890 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24892 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24894 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24895
24896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24897
24898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24900
24901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24905
24906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24910
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24915
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24919 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24922
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24938
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24941
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24944
24945 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24946
24947 /* BMI */
24948 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24949 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24950 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24951
24952 /* TBM */
24953 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24954 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24955
24956 /* F16C */
24957 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24958 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24959 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24960 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24961 };
24962
24963 /* FMA4 and XOP. */
24964 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24965 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24966 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24967 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24968 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24969 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24970 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24971 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24972 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24973 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24974 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24975 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24976 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24977 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24978 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24979 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24980 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24981 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24982 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24983 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24984 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24985 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24986 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24987 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24988 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24989 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24990 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24991 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24992 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24993 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24994 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24995 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24996 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24997 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24998 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24999 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25000 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25001 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25002 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25003 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25004 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25005 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25006 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25007 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25008 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25009 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25010 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25011 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25012 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25013 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25014 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25015 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25016
25017 static const struct builtin_description bdesc_multi_arg[] =
25018 {
25019 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25020 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25021 UNKNOWN, (int)MULTI_ARG_3_SF },
25022 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25023 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25024 UNKNOWN, (int)MULTI_ARG_3_DF },
25025
25026 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25027 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25028 UNKNOWN, (int)MULTI_ARG_3_SF },
25029 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25030 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25031 UNKNOWN, (int)MULTI_ARG_3_DF },
25032 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25033 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25034 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25035 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25036 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25037 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25038
25039 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25040 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25041 UNKNOWN, (int)MULTI_ARG_3_SF },
25042 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25043 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25044 UNKNOWN, (int)MULTI_ARG_3_DF },
25045 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25046 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25047 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25048 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25049 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25050 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25051
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25059
25060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25067
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25069
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25082
25083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25099
25100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25106
25107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25122
25123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25130
25131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25138
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25146
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25154
25155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25162
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25170
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25178
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25186
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25195
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25204
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25209
25210 };
25211
25212 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25213 in the current target ISA to allow the user to compile particular modules
25214 with different target specific options that differ from the command line
25215 options. */
25216 static void
25217 ix86_init_mmx_sse_builtins (void)
25218 {
25219 const struct builtin_description * d;
25220 enum ix86_builtin_func_type ftype;
25221 size_t i;
25222
25223 /* Add all special builtins with variable number of operands. */
25224 for (i = 0, d = bdesc_special_args;
25225 i < ARRAY_SIZE (bdesc_special_args);
25226 i++, d++)
25227 {
25228 if (d->name == 0)
25229 continue;
25230
25231 ftype = (enum ix86_builtin_func_type) d->flag;
25232 def_builtin (d->mask, d->name, ftype, d->code);
25233 }
25234
25235 /* Add all builtins with variable number of operands. */
25236 for (i = 0, d = bdesc_args;
25237 i < ARRAY_SIZE (bdesc_args);
25238 i++, d++)
25239 {
25240 if (d->name == 0)
25241 continue;
25242
25243 ftype = (enum ix86_builtin_func_type) d->flag;
25244 def_builtin_const (d->mask, d->name, ftype, d->code);
25245 }
25246
25247 /* pcmpestr[im] insns. */
25248 for (i = 0, d = bdesc_pcmpestr;
25249 i < ARRAY_SIZE (bdesc_pcmpestr);
25250 i++, d++)
25251 {
25252 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25253 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25254 else
25255 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25256 def_builtin_const (d->mask, d->name, ftype, d->code);
25257 }
25258
25259 /* pcmpistr[im] insns. */
25260 for (i = 0, d = bdesc_pcmpistr;
25261 i < ARRAY_SIZE (bdesc_pcmpistr);
25262 i++, d++)
25263 {
25264 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25265 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25266 else
25267 ftype = INT_FTYPE_V16QI_V16QI_INT;
25268 def_builtin_const (d->mask, d->name, ftype, d->code);
25269 }
25270
25271 /* comi/ucomi insns. */
25272 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25273 {
25274 if (d->mask == OPTION_MASK_ISA_SSE2)
25275 ftype = INT_FTYPE_V2DF_V2DF;
25276 else
25277 ftype = INT_FTYPE_V4SF_V4SF;
25278 def_builtin_const (d->mask, d->name, ftype, d->code);
25279 }
25280
25281 /* SSE */
25282 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25283 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25284 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25285 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25286
25287 /* SSE or 3DNow!A */
25288 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25289 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25290 IX86_BUILTIN_MASKMOVQ);
25291
25292 /* SSE2 */
25293 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25294 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25295
25296 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25297 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25298 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25299 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25300
25301 /* SSE3. */
25302 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25303 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25304 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25305 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25306
25307 /* AES */
25308 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25309 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25310 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25311 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25312 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25313 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25314 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25315 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25316 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25317 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25318 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25319 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25320
25321 /* PCLMUL */
25322 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25323 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25324
25325 /* RDRND */
25326 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25327 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25328 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25329 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25330 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25331 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25332 IX86_BUILTIN_RDRAND64_STEP);
25333
25334 /* MMX access to the vec_init patterns. */
25335 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25336 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25337
25338 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25339 V4HI_FTYPE_HI_HI_HI_HI,
25340 IX86_BUILTIN_VEC_INIT_V4HI);
25341
25342 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25343 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25344 IX86_BUILTIN_VEC_INIT_V8QI);
25345
25346 /* Access to the vec_extract patterns. */
25347 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25348 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25350 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25351 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25352 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25353 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25354 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25355 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25356 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25357
25358 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25359 "__builtin_ia32_vec_ext_v4hi",
25360 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25361
25362 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25363 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25364
25365 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25366 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25367
25368 /* Access to the vec_set patterns. */
25369 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25370 "__builtin_ia32_vec_set_v2di",
25371 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25372
25373 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25374 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25375
25376 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25377 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25378
25379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25380 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25381
25382 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25383 "__builtin_ia32_vec_set_v4hi",
25384 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25385
25386 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25387 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25388
25389 /* Add FMA4 multi-arg argument instructions */
25390 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25391 {
25392 if (d->name == 0)
25393 continue;
25394
25395 ftype = (enum ix86_builtin_func_type) d->flag;
25396 def_builtin_const (d->mask, d->name, ftype, d->code);
25397 }
25398 }
25399
25400 /* Internal method for ix86_init_builtins. */
25401
25402 static void
25403 ix86_init_builtins_va_builtins_abi (void)
25404 {
25405 tree ms_va_ref, sysv_va_ref;
25406 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25407 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25408 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25409 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25410
25411 if (!TARGET_64BIT)
25412 return;
25413 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25414 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25415 ms_va_ref = build_reference_type (ms_va_list_type_node);
25416 sysv_va_ref =
25417 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25418
25419 fnvoid_va_end_ms =
25420 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25421 fnvoid_va_start_ms =
25422 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25423 fnvoid_va_end_sysv =
25424 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25425 fnvoid_va_start_sysv =
25426 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25427 NULL_TREE);
25428 fnvoid_va_copy_ms =
25429 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25430 NULL_TREE);
25431 fnvoid_va_copy_sysv =
25432 build_function_type_list (void_type_node, sysv_va_ref,
25433 sysv_va_ref, NULL_TREE);
25434
25435 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25436 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25437 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25438 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25439 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25440 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25441 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25442 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25443 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25444 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25445 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25446 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25447 }
25448
25449 static void
25450 ix86_init_builtin_types (void)
25451 {
25452 tree float128_type_node, float80_type_node;
25453
25454 /* The __float80 type. */
25455 float80_type_node = long_double_type_node;
25456 if (TYPE_MODE (float80_type_node) != XFmode)
25457 {
25458 /* The __float80 type. */
25459 float80_type_node = make_node (REAL_TYPE);
25460
25461 TYPE_PRECISION (float80_type_node) = 80;
25462 layout_type (float80_type_node);
25463 }
25464 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25465
25466 /* The __float128 type. */
25467 float128_type_node = make_node (REAL_TYPE);
25468 TYPE_PRECISION (float128_type_node) = 128;
25469 layout_type (float128_type_node);
25470 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25471
25472 /* This macro is built by i386-builtin-types.awk. */
25473 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25474 }
25475
25476 static void
25477 ix86_init_builtins (void)
25478 {
25479 tree t;
25480
25481 ix86_init_builtin_types ();
25482
25483 /* TFmode support builtins. */
25484 def_builtin_const (0, "__builtin_infq",
25485 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25486 def_builtin_const (0, "__builtin_huge_valq",
25487 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25488
25489 /* We will expand them to normal call if SSE2 isn't available since
25490 they are used by libgcc. */
25491 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25492 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25493 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25494 TREE_READONLY (t) = 1;
25495 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25496
25497 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25498 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25499 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25500 TREE_READONLY (t) = 1;
25501 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25502
25503 ix86_init_mmx_sse_builtins ();
25504
25505 if (TARGET_LP64)
25506 ix86_init_builtins_va_builtins_abi ();
25507
25508 #ifdef SUBTARGET_INIT_BUILTINS
25509 SUBTARGET_INIT_BUILTINS;
25510 #endif
25511 }
25512
25513 /* Return the ix86 builtin for CODE. */
25514
25515 static tree
25516 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25517 {
25518 if (code >= IX86_BUILTIN_MAX)
25519 return error_mark_node;
25520
25521 return ix86_builtins[code];
25522 }
25523
25524 /* Errors in the source file can cause expand_expr to return const0_rtx
25525 where we expect a vector. To avoid crashing, use one of the vector
25526 clear instructions. */
25527 static rtx
25528 safe_vector_operand (rtx x, enum machine_mode mode)
25529 {
25530 if (x == const0_rtx)
25531 x = CONST0_RTX (mode);
25532 return x;
25533 }
25534
25535 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25536
25537 static rtx
25538 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25539 {
25540 rtx pat;
25541 tree arg0 = CALL_EXPR_ARG (exp, 0);
25542 tree arg1 = CALL_EXPR_ARG (exp, 1);
25543 rtx op0 = expand_normal (arg0);
25544 rtx op1 = expand_normal (arg1);
25545 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25546 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25547 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25548
25549 if (VECTOR_MODE_P (mode0))
25550 op0 = safe_vector_operand (op0, mode0);
25551 if (VECTOR_MODE_P (mode1))
25552 op1 = safe_vector_operand (op1, mode1);
25553
25554 if (optimize || !target
25555 || GET_MODE (target) != tmode
25556 || !insn_data[icode].operand[0].predicate (target, tmode))
25557 target = gen_reg_rtx (tmode);
25558
25559 if (GET_MODE (op1) == SImode && mode1 == TImode)
25560 {
25561 rtx x = gen_reg_rtx (V4SImode);
25562 emit_insn (gen_sse2_loadd (x, op1));
25563 op1 = gen_lowpart (TImode, x);
25564 }
25565
25566 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25567 op0 = copy_to_mode_reg (mode0, op0);
25568 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25569 op1 = copy_to_mode_reg (mode1, op1);
25570
25571 pat = GEN_FCN (icode) (target, op0, op1);
25572 if (! pat)
25573 return 0;
25574
25575 emit_insn (pat);
25576
25577 return target;
25578 }
25579
25580 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25581
25582 static rtx
25583 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25584 enum ix86_builtin_func_type m_type,
25585 enum rtx_code sub_code)
25586 {
25587 rtx pat;
25588 int i;
25589 int nargs;
25590 bool comparison_p = false;
25591 bool tf_p = false;
25592 bool last_arg_constant = false;
25593 int num_memory = 0;
25594 struct {
25595 rtx op;
25596 enum machine_mode mode;
25597 } args[4];
25598
25599 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25600
25601 switch (m_type)
25602 {
25603 case MULTI_ARG_4_DF2_DI_I:
25604 case MULTI_ARG_4_DF2_DI_I1:
25605 case MULTI_ARG_4_SF2_SI_I:
25606 case MULTI_ARG_4_SF2_SI_I1:
25607 nargs = 4;
25608 last_arg_constant = true;
25609 break;
25610
25611 case MULTI_ARG_3_SF:
25612 case MULTI_ARG_3_DF:
25613 case MULTI_ARG_3_SF2:
25614 case MULTI_ARG_3_DF2:
25615 case MULTI_ARG_3_DI:
25616 case MULTI_ARG_3_SI:
25617 case MULTI_ARG_3_SI_DI:
25618 case MULTI_ARG_3_HI:
25619 case MULTI_ARG_3_HI_SI:
25620 case MULTI_ARG_3_QI:
25621 case MULTI_ARG_3_DI2:
25622 case MULTI_ARG_3_SI2:
25623 case MULTI_ARG_3_HI2:
25624 case MULTI_ARG_3_QI2:
25625 nargs = 3;
25626 break;
25627
25628 case MULTI_ARG_2_SF:
25629 case MULTI_ARG_2_DF:
25630 case MULTI_ARG_2_DI:
25631 case MULTI_ARG_2_SI:
25632 case MULTI_ARG_2_HI:
25633 case MULTI_ARG_2_QI:
25634 nargs = 2;
25635 break;
25636
25637 case MULTI_ARG_2_DI_IMM:
25638 case MULTI_ARG_2_SI_IMM:
25639 case MULTI_ARG_2_HI_IMM:
25640 case MULTI_ARG_2_QI_IMM:
25641 nargs = 2;
25642 last_arg_constant = true;
25643 break;
25644
25645 case MULTI_ARG_1_SF:
25646 case MULTI_ARG_1_DF:
25647 case MULTI_ARG_1_SF2:
25648 case MULTI_ARG_1_DF2:
25649 case MULTI_ARG_1_DI:
25650 case MULTI_ARG_1_SI:
25651 case MULTI_ARG_1_HI:
25652 case MULTI_ARG_1_QI:
25653 case MULTI_ARG_1_SI_DI:
25654 case MULTI_ARG_1_HI_DI:
25655 case MULTI_ARG_1_HI_SI:
25656 case MULTI_ARG_1_QI_DI:
25657 case MULTI_ARG_1_QI_SI:
25658 case MULTI_ARG_1_QI_HI:
25659 nargs = 1;
25660 break;
25661
25662 case MULTI_ARG_2_DI_CMP:
25663 case MULTI_ARG_2_SI_CMP:
25664 case MULTI_ARG_2_HI_CMP:
25665 case MULTI_ARG_2_QI_CMP:
25666 nargs = 2;
25667 comparison_p = true;
25668 break;
25669
25670 case MULTI_ARG_2_SF_TF:
25671 case MULTI_ARG_2_DF_TF:
25672 case MULTI_ARG_2_DI_TF:
25673 case MULTI_ARG_2_SI_TF:
25674 case MULTI_ARG_2_HI_TF:
25675 case MULTI_ARG_2_QI_TF:
25676 nargs = 2;
25677 tf_p = true;
25678 break;
25679
25680 default:
25681 gcc_unreachable ();
25682 }
25683
25684 if (optimize || !target
25685 || GET_MODE (target) != tmode
25686 || !insn_data[icode].operand[0].predicate (target, tmode))
25687 target = gen_reg_rtx (tmode);
25688
25689 gcc_assert (nargs <= 4);
25690
25691 for (i = 0; i < nargs; i++)
25692 {
25693 tree arg = CALL_EXPR_ARG (exp, i);
25694 rtx op = expand_normal (arg);
25695 int adjust = (comparison_p) ? 1 : 0;
25696 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25697
25698 if (last_arg_constant && i == nargs - 1)
25699 {
25700 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25701 {
25702 enum insn_code new_icode = icode;
25703 switch (icode)
25704 {
25705 case CODE_FOR_xop_vpermil2v2df3:
25706 case CODE_FOR_xop_vpermil2v4sf3:
25707 case CODE_FOR_xop_vpermil2v4df3:
25708 case CODE_FOR_xop_vpermil2v8sf3:
25709 error ("the last argument must be a 2-bit immediate");
25710 return gen_reg_rtx (tmode);
25711 case CODE_FOR_xop_rotlv2di3:
25712 new_icode = CODE_FOR_rotlv2di3;
25713 goto xop_rotl;
25714 case CODE_FOR_xop_rotlv4si3:
25715 new_icode = CODE_FOR_rotlv4si3;
25716 goto xop_rotl;
25717 case CODE_FOR_xop_rotlv8hi3:
25718 new_icode = CODE_FOR_rotlv8hi3;
25719 goto xop_rotl;
25720 case CODE_FOR_xop_rotlv16qi3:
25721 new_icode = CODE_FOR_rotlv16qi3;
25722 xop_rotl:
25723 if (CONST_INT_P (op))
25724 {
25725 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25726 op = GEN_INT (INTVAL (op) & mask);
25727 gcc_checking_assert
25728 (insn_data[icode].operand[i + 1].predicate (op, mode));
25729 }
25730 else
25731 {
25732 gcc_checking_assert
25733 (nargs == 2
25734 && insn_data[new_icode].operand[0].mode == tmode
25735 && insn_data[new_icode].operand[1].mode == tmode
25736 && insn_data[new_icode].operand[2].mode == mode
25737 && insn_data[new_icode].operand[0].predicate
25738 == insn_data[icode].operand[0].predicate
25739 && insn_data[new_icode].operand[1].predicate
25740 == insn_data[icode].operand[1].predicate);
25741 icode = new_icode;
25742 goto non_constant;
25743 }
25744 break;
25745 default:
25746 gcc_unreachable ();
25747 }
25748 }
25749 }
25750 else
25751 {
25752 non_constant:
25753 if (VECTOR_MODE_P (mode))
25754 op = safe_vector_operand (op, mode);
25755
25756 /* If we aren't optimizing, only allow one memory operand to be
25757 generated. */
25758 if (memory_operand (op, mode))
25759 num_memory++;
25760
25761 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25762
25763 if (optimize
25764 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25765 || num_memory > 1)
25766 op = force_reg (mode, op);
25767 }
25768
25769 args[i].op = op;
25770 args[i].mode = mode;
25771 }
25772
25773 switch (nargs)
25774 {
25775 case 1:
25776 pat = GEN_FCN (icode) (target, args[0].op);
25777 break;
25778
25779 case 2:
25780 if (tf_p)
25781 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25782 GEN_INT ((int)sub_code));
25783 else if (! comparison_p)
25784 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25785 else
25786 {
25787 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25788 args[0].op,
25789 args[1].op);
25790
25791 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25792 }
25793 break;
25794
25795 case 3:
25796 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25797 break;
25798
25799 case 4:
25800 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25801 break;
25802
25803 default:
25804 gcc_unreachable ();
25805 }
25806
25807 if (! pat)
25808 return 0;
25809
25810 emit_insn (pat);
25811 return target;
25812 }
25813
25814 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25815 insns with vec_merge. */
25816
25817 static rtx
25818 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25819 rtx target)
25820 {
25821 rtx pat;
25822 tree arg0 = CALL_EXPR_ARG (exp, 0);
25823 rtx op1, op0 = expand_normal (arg0);
25824 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25825 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25826
25827 if (optimize || !target
25828 || GET_MODE (target) != tmode
25829 || !insn_data[icode].operand[0].predicate (target, tmode))
25830 target = gen_reg_rtx (tmode);
25831
25832 if (VECTOR_MODE_P (mode0))
25833 op0 = safe_vector_operand (op0, mode0);
25834
25835 if ((optimize && !register_operand (op0, mode0))
25836 || !insn_data[icode].operand[1].predicate (op0, mode0))
25837 op0 = copy_to_mode_reg (mode0, op0);
25838
25839 op1 = op0;
25840 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25841 op1 = copy_to_mode_reg (mode0, op1);
25842
25843 pat = GEN_FCN (icode) (target, op0, op1);
25844 if (! pat)
25845 return 0;
25846 emit_insn (pat);
25847 return target;
25848 }
25849
25850 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25851
25852 static rtx
25853 ix86_expand_sse_compare (const struct builtin_description *d,
25854 tree exp, rtx target, bool swap)
25855 {
25856 rtx pat;
25857 tree arg0 = CALL_EXPR_ARG (exp, 0);
25858 tree arg1 = CALL_EXPR_ARG (exp, 1);
25859 rtx op0 = expand_normal (arg0);
25860 rtx op1 = expand_normal (arg1);
25861 rtx op2;
25862 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25863 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25864 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25865 enum rtx_code comparison = d->comparison;
25866
25867 if (VECTOR_MODE_P (mode0))
25868 op0 = safe_vector_operand (op0, mode0);
25869 if (VECTOR_MODE_P (mode1))
25870 op1 = safe_vector_operand (op1, mode1);
25871
25872 /* Swap operands if we have a comparison that isn't available in
25873 hardware. */
25874 if (swap)
25875 {
25876 rtx tmp = gen_reg_rtx (mode1);
25877 emit_move_insn (tmp, op1);
25878 op1 = op0;
25879 op0 = tmp;
25880 }
25881
25882 if (optimize || !target
25883 || GET_MODE (target) != tmode
25884 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25885 target = gen_reg_rtx (tmode);
25886
25887 if ((optimize && !register_operand (op0, mode0))
25888 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25889 op0 = copy_to_mode_reg (mode0, op0);
25890 if ((optimize && !register_operand (op1, mode1))
25891 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25892 op1 = copy_to_mode_reg (mode1, op1);
25893
25894 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25895 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25896 if (! pat)
25897 return 0;
25898 emit_insn (pat);
25899 return target;
25900 }
25901
25902 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25903
25904 static rtx
25905 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25906 rtx target)
25907 {
25908 rtx pat;
25909 tree arg0 = CALL_EXPR_ARG (exp, 0);
25910 tree arg1 = CALL_EXPR_ARG (exp, 1);
25911 rtx op0 = expand_normal (arg0);
25912 rtx op1 = expand_normal (arg1);
25913 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25914 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25915 enum rtx_code comparison = d->comparison;
25916
25917 if (VECTOR_MODE_P (mode0))
25918 op0 = safe_vector_operand (op0, mode0);
25919 if (VECTOR_MODE_P (mode1))
25920 op1 = safe_vector_operand (op1, mode1);
25921
25922 /* Swap operands if we have a comparison that isn't available in
25923 hardware. */
25924 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25925 {
25926 rtx tmp = op1;
25927 op1 = op0;
25928 op0 = tmp;
25929 }
25930
25931 target = gen_reg_rtx (SImode);
25932 emit_move_insn (target, const0_rtx);
25933 target = gen_rtx_SUBREG (QImode, target, 0);
25934
25935 if ((optimize && !register_operand (op0, mode0))
25936 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25937 op0 = copy_to_mode_reg (mode0, op0);
25938 if ((optimize && !register_operand (op1, mode1))
25939 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25940 op1 = copy_to_mode_reg (mode1, op1);
25941
25942 pat = GEN_FCN (d->icode) (op0, op1);
25943 if (! pat)
25944 return 0;
25945 emit_insn (pat);
25946 emit_insn (gen_rtx_SET (VOIDmode,
25947 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25948 gen_rtx_fmt_ee (comparison, QImode,
25949 SET_DEST (pat),
25950 const0_rtx)));
25951
25952 return SUBREG_REG (target);
25953 }
25954
25955 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25956
25957 static rtx
25958 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
25959 rtx target)
25960 {
25961 rtx pat;
25962 tree arg0 = CALL_EXPR_ARG (exp, 0);
25963 rtx op1, op0 = expand_normal (arg0);
25964 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25965 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25966
25967 if (optimize || target == 0
25968 || GET_MODE (target) != tmode
25969 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25970 target = gen_reg_rtx (tmode);
25971
25972 if (VECTOR_MODE_P (mode0))
25973 op0 = safe_vector_operand (op0, mode0);
25974
25975 if ((optimize && !register_operand (op0, mode0))
25976 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25977 op0 = copy_to_mode_reg (mode0, op0);
25978
25979 op1 = GEN_INT (d->comparison);
25980
25981 pat = GEN_FCN (d->icode) (target, op0, op1);
25982 if (! pat)
25983 return 0;
25984 emit_insn (pat);
25985 return target;
25986 }
25987
25988 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25989
25990 static rtx
25991 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25992 rtx target)
25993 {
25994 rtx pat;
25995 tree arg0 = CALL_EXPR_ARG (exp, 0);
25996 tree arg1 = CALL_EXPR_ARG (exp, 1);
25997 rtx op0 = expand_normal (arg0);
25998 rtx op1 = expand_normal (arg1);
25999 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26000 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26001 enum rtx_code comparison = d->comparison;
26002
26003 if (VECTOR_MODE_P (mode0))
26004 op0 = safe_vector_operand (op0, mode0);
26005 if (VECTOR_MODE_P (mode1))
26006 op1 = safe_vector_operand (op1, mode1);
26007
26008 target = gen_reg_rtx (SImode);
26009 emit_move_insn (target, const0_rtx);
26010 target = gen_rtx_SUBREG (QImode, target, 0);
26011
26012 if ((optimize && !register_operand (op0, mode0))
26013 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26014 op0 = copy_to_mode_reg (mode0, op0);
26015 if ((optimize && !register_operand (op1, mode1))
26016 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26017 op1 = copy_to_mode_reg (mode1, op1);
26018
26019 pat = GEN_FCN (d->icode) (op0, op1);
26020 if (! pat)
26021 return 0;
26022 emit_insn (pat);
26023 emit_insn (gen_rtx_SET (VOIDmode,
26024 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26025 gen_rtx_fmt_ee (comparison, QImode,
26026 SET_DEST (pat),
26027 const0_rtx)));
26028
26029 return SUBREG_REG (target);
26030 }
26031
26032 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26033
26034 static rtx
26035 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26036 tree exp, rtx target)
26037 {
26038 rtx pat;
26039 tree arg0 = CALL_EXPR_ARG (exp, 0);
26040 tree arg1 = CALL_EXPR_ARG (exp, 1);
26041 tree arg2 = CALL_EXPR_ARG (exp, 2);
26042 tree arg3 = CALL_EXPR_ARG (exp, 3);
26043 tree arg4 = CALL_EXPR_ARG (exp, 4);
26044 rtx scratch0, scratch1;
26045 rtx op0 = expand_normal (arg0);
26046 rtx op1 = expand_normal (arg1);
26047 rtx op2 = expand_normal (arg2);
26048 rtx op3 = expand_normal (arg3);
26049 rtx op4 = expand_normal (arg4);
26050 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26051
26052 tmode0 = insn_data[d->icode].operand[0].mode;
26053 tmode1 = insn_data[d->icode].operand[1].mode;
26054 modev2 = insn_data[d->icode].operand[2].mode;
26055 modei3 = insn_data[d->icode].operand[3].mode;
26056 modev4 = insn_data[d->icode].operand[4].mode;
26057 modei5 = insn_data[d->icode].operand[5].mode;
26058 modeimm = insn_data[d->icode].operand[6].mode;
26059
26060 if (VECTOR_MODE_P (modev2))
26061 op0 = safe_vector_operand (op0, modev2);
26062 if (VECTOR_MODE_P (modev4))
26063 op2 = safe_vector_operand (op2, modev4);
26064
26065 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26066 op0 = copy_to_mode_reg (modev2, op0);
26067 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26068 op1 = copy_to_mode_reg (modei3, op1);
26069 if ((optimize && !register_operand (op2, modev4))
26070 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26071 op2 = copy_to_mode_reg (modev4, op2);
26072 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26073 op3 = copy_to_mode_reg (modei5, op3);
26074
26075 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26076 {
26077 error ("the fifth argument must be an 8-bit immediate");
26078 return const0_rtx;
26079 }
26080
26081 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26082 {
26083 if (optimize || !target
26084 || GET_MODE (target) != tmode0
26085 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26086 target = gen_reg_rtx (tmode0);
26087
26088 scratch1 = gen_reg_rtx (tmode1);
26089
26090 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26091 }
26092 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26093 {
26094 if (optimize || !target
26095 || GET_MODE (target) != tmode1
26096 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26097 target = gen_reg_rtx (tmode1);
26098
26099 scratch0 = gen_reg_rtx (tmode0);
26100
26101 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26102 }
26103 else
26104 {
26105 gcc_assert (d->flag);
26106
26107 scratch0 = gen_reg_rtx (tmode0);
26108 scratch1 = gen_reg_rtx (tmode1);
26109
26110 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26111 }
26112
26113 if (! pat)
26114 return 0;
26115
26116 emit_insn (pat);
26117
26118 if (d->flag)
26119 {
26120 target = gen_reg_rtx (SImode);
26121 emit_move_insn (target, const0_rtx);
26122 target = gen_rtx_SUBREG (QImode, target, 0);
26123
26124 emit_insn
26125 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26126 gen_rtx_fmt_ee (EQ, QImode,
26127 gen_rtx_REG ((enum machine_mode) d->flag,
26128 FLAGS_REG),
26129 const0_rtx)));
26130 return SUBREG_REG (target);
26131 }
26132 else
26133 return target;
26134 }
26135
26136
26137 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26138
26139 static rtx
26140 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26141 tree exp, rtx target)
26142 {
26143 rtx pat;
26144 tree arg0 = CALL_EXPR_ARG (exp, 0);
26145 tree arg1 = CALL_EXPR_ARG (exp, 1);
26146 tree arg2 = CALL_EXPR_ARG (exp, 2);
26147 rtx scratch0, scratch1;
26148 rtx op0 = expand_normal (arg0);
26149 rtx op1 = expand_normal (arg1);
26150 rtx op2 = expand_normal (arg2);
26151 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26152
26153 tmode0 = insn_data[d->icode].operand[0].mode;
26154 tmode1 = insn_data[d->icode].operand[1].mode;
26155 modev2 = insn_data[d->icode].operand[2].mode;
26156 modev3 = insn_data[d->icode].operand[3].mode;
26157 modeimm = insn_data[d->icode].operand[4].mode;
26158
26159 if (VECTOR_MODE_P (modev2))
26160 op0 = safe_vector_operand (op0, modev2);
26161 if (VECTOR_MODE_P (modev3))
26162 op1 = safe_vector_operand (op1, modev3);
26163
26164 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26165 op0 = copy_to_mode_reg (modev2, op0);
26166 if ((optimize && !register_operand (op1, modev3))
26167 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26168 op1 = copy_to_mode_reg (modev3, op1);
26169
26170 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26171 {
26172 error ("the third argument must be an 8-bit immediate");
26173 return const0_rtx;
26174 }
26175
26176 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26177 {
26178 if (optimize || !target
26179 || GET_MODE (target) != tmode0
26180 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26181 target = gen_reg_rtx (tmode0);
26182
26183 scratch1 = gen_reg_rtx (tmode1);
26184
26185 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26186 }
26187 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26188 {
26189 if (optimize || !target
26190 || GET_MODE (target) != tmode1
26191 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26192 target = gen_reg_rtx (tmode1);
26193
26194 scratch0 = gen_reg_rtx (tmode0);
26195
26196 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26197 }
26198 else
26199 {
26200 gcc_assert (d->flag);
26201
26202 scratch0 = gen_reg_rtx (tmode0);
26203 scratch1 = gen_reg_rtx (tmode1);
26204
26205 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26206 }
26207
26208 if (! pat)
26209 return 0;
26210
26211 emit_insn (pat);
26212
26213 if (d->flag)
26214 {
26215 target = gen_reg_rtx (SImode);
26216 emit_move_insn (target, const0_rtx);
26217 target = gen_rtx_SUBREG (QImode, target, 0);
26218
26219 emit_insn
26220 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26221 gen_rtx_fmt_ee (EQ, QImode,
26222 gen_rtx_REG ((enum machine_mode) d->flag,
26223 FLAGS_REG),
26224 const0_rtx)));
26225 return SUBREG_REG (target);
26226 }
26227 else
26228 return target;
26229 }
26230
26231 /* Subroutine of ix86_expand_builtin to take care of insns with
26232 variable number of operands. */
26233
26234 static rtx
26235 ix86_expand_args_builtin (const struct builtin_description *d,
26236 tree exp, rtx target)
26237 {
26238 rtx pat, real_target;
26239 unsigned int i, nargs;
26240 unsigned int nargs_constant = 0;
26241 int num_memory = 0;
26242 struct
26243 {
26244 rtx op;
26245 enum machine_mode mode;
26246 } args[4];
26247 bool last_arg_count = false;
26248 enum insn_code icode = d->icode;
26249 const struct insn_data_d *insn_p = &insn_data[icode];
26250 enum machine_mode tmode = insn_p->operand[0].mode;
26251 enum machine_mode rmode = VOIDmode;
26252 bool swap = false;
26253 enum rtx_code comparison = d->comparison;
26254
26255 switch ((enum ix86_builtin_func_type) d->flag)
26256 {
26257 case V2DF_FTYPE_V2DF_ROUND:
26258 case V4DF_FTYPE_V4DF_ROUND:
26259 case V4SF_FTYPE_V4SF_ROUND:
26260 case V8SF_FTYPE_V8SF_ROUND:
26261 return ix86_expand_sse_round (d, exp, target);
26262 case INT_FTYPE_V8SF_V8SF_PTEST:
26263 case INT_FTYPE_V4DI_V4DI_PTEST:
26264 case INT_FTYPE_V4DF_V4DF_PTEST:
26265 case INT_FTYPE_V4SF_V4SF_PTEST:
26266 case INT_FTYPE_V2DI_V2DI_PTEST:
26267 case INT_FTYPE_V2DF_V2DF_PTEST:
26268 return ix86_expand_sse_ptest (d, exp, target);
26269 case FLOAT128_FTYPE_FLOAT128:
26270 case FLOAT_FTYPE_FLOAT:
26271 case INT_FTYPE_INT:
26272 case UINT64_FTYPE_INT:
26273 case UINT16_FTYPE_UINT16:
26274 case INT64_FTYPE_INT64:
26275 case INT64_FTYPE_V4SF:
26276 case INT64_FTYPE_V2DF:
26277 case INT_FTYPE_V16QI:
26278 case INT_FTYPE_V8QI:
26279 case INT_FTYPE_V8SF:
26280 case INT_FTYPE_V4DF:
26281 case INT_FTYPE_V4SF:
26282 case INT_FTYPE_V2DF:
26283 case V16QI_FTYPE_V16QI:
26284 case V8SI_FTYPE_V8SF:
26285 case V8SI_FTYPE_V4SI:
26286 case V8HI_FTYPE_V8HI:
26287 case V8HI_FTYPE_V16QI:
26288 case V8QI_FTYPE_V8QI:
26289 case V8SF_FTYPE_V8SF:
26290 case V8SF_FTYPE_V8SI:
26291 case V8SF_FTYPE_V4SF:
26292 case V8SF_FTYPE_V8HI:
26293 case V4SI_FTYPE_V4SI:
26294 case V4SI_FTYPE_V16QI:
26295 case V4SI_FTYPE_V4SF:
26296 case V4SI_FTYPE_V8SI:
26297 case V4SI_FTYPE_V8HI:
26298 case V4SI_FTYPE_V4DF:
26299 case V4SI_FTYPE_V2DF:
26300 case V4HI_FTYPE_V4HI:
26301 case V4DF_FTYPE_V4DF:
26302 case V4DF_FTYPE_V4SI:
26303 case V4DF_FTYPE_V4SF:
26304 case V4DF_FTYPE_V2DF:
26305 case V4SF_FTYPE_V4SF:
26306 case V4SF_FTYPE_V4SI:
26307 case V4SF_FTYPE_V8SF:
26308 case V4SF_FTYPE_V4DF:
26309 case V4SF_FTYPE_V8HI:
26310 case V4SF_FTYPE_V2DF:
26311 case V2DI_FTYPE_V2DI:
26312 case V2DI_FTYPE_V16QI:
26313 case V2DI_FTYPE_V8HI:
26314 case V2DI_FTYPE_V4SI:
26315 case V2DF_FTYPE_V2DF:
26316 case V2DF_FTYPE_V4SI:
26317 case V2DF_FTYPE_V4DF:
26318 case V2DF_FTYPE_V4SF:
26319 case V2DF_FTYPE_V2SI:
26320 case V2SI_FTYPE_V2SI:
26321 case V2SI_FTYPE_V4SF:
26322 case V2SI_FTYPE_V2SF:
26323 case V2SI_FTYPE_V2DF:
26324 case V2SF_FTYPE_V2SF:
26325 case V2SF_FTYPE_V2SI:
26326 nargs = 1;
26327 break;
26328 case V4SF_FTYPE_V4SF_VEC_MERGE:
26329 case V2DF_FTYPE_V2DF_VEC_MERGE:
26330 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26331 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26332 case V16QI_FTYPE_V16QI_V16QI:
26333 case V16QI_FTYPE_V8HI_V8HI:
26334 case V8QI_FTYPE_V8QI_V8QI:
26335 case V8QI_FTYPE_V4HI_V4HI:
26336 case V8HI_FTYPE_V8HI_V8HI:
26337 case V8HI_FTYPE_V16QI_V16QI:
26338 case V8HI_FTYPE_V4SI_V4SI:
26339 case V8SF_FTYPE_V8SF_V8SF:
26340 case V8SF_FTYPE_V8SF_V8SI:
26341 case V4SI_FTYPE_V4SI_V4SI:
26342 case V4SI_FTYPE_V8HI_V8HI:
26343 case V4SI_FTYPE_V4SF_V4SF:
26344 case V4SI_FTYPE_V2DF_V2DF:
26345 case V4HI_FTYPE_V4HI_V4HI:
26346 case V4HI_FTYPE_V8QI_V8QI:
26347 case V4HI_FTYPE_V2SI_V2SI:
26348 case V4DF_FTYPE_V4DF_V4DF:
26349 case V4DF_FTYPE_V4DF_V4DI:
26350 case V4SF_FTYPE_V4SF_V4SF:
26351 case V4SF_FTYPE_V4SF_V4SI:
26352 case V4SF_FTYPE_V4SF_V2SI:
26353 case V4SF_FTYPE_V4SF_V2DF:
26354 case V4SF_FTYPE_V4SF_DI:
26355 case V4SF_FTYPE_V4SF_SI:
26356 case V2DI_FTYPE_V2DI_V2DI:
26357 case V2DI_FTYPE_V16QI_V16QI:
26358 case V2DI_FTYPE_V4SI_V4SI:
26359 case V2DI_FTYPE_V2DI_V16QI:
26360 case V2DI_FTYPE_V2DF_V2DF:
26361 case V2SI_FTYPE_V2SI_V2SI:
26362 case V2SI_FTYPE_V4HI_V4HI:
26363 case V2SI_FTYPE_V2SF_V2SF:
26364 case V2DF_FTYPE_V2DF_V2DF:
26365 case V2DF_FTYPE_V2DF_V4SF:
26366 case V2DF_FTYPE_V2DF_V2DI:
26367 case V2DF_FTYPE_V2DF_DI:
26368 case V2DF_FTYPE_V2DF_SI:
26369 case V2SF_FTYPE_V2SF_V2SF:
26370 case V1DI_FTYPE_V1DI_V1DI:
26371 case V1DI_FTYPE_V8QI_V8QI:
26372 case V1DI_FTYPE_V2SI_V2SI:
26373 if (comparison == UNKNOWN)
26374 return ix86_expand_binop_builtin (icode, exp, target);
26375 nargs = 2;
26376 break;
26377 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26378 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26379 gcc_assert (comparison != UNKNOWN);
26380 nargs = 2;
26381 swap = true;
26382 break;
26383 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26384 case V8HI_FTYPE_V8HI_SI_COUNT:
26385 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26386 case V4SI_FTYPE_V4SI_SI_COUNT:
26387 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26388 case V4HI_FTYPE_V4HI_SI_COUNT:
26389 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26390 case V2DI_FTYPE_V2DI_SI_COUNT:
26391 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26392 case V2SI_FTYPE_V2SI_SI_COUNT:
26393 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26394 case V1DI_FTYPE_V1DI_SI_COUNT:
26395 nargs = 2;
26396 last_arg_count = true;
26397 break;
26398 case UINT64_FTYPE_UINT64_UINT64:
26399 case UINT_FTYPE_UINT_UINT:
26400 case UINT_FTYPE_UINT_USHORT:
26401 case UINT_FTYPE_UINT_UCHAR:
26402 case UINT16_FTYPE_UINT16_INT:
26403 case UINT8_FTYPE_UINT8_INT:
26404 nargs = 2;
26405 break;
26406 case V2DI_FTYPE_V2DI_INT_CONVERT:
26407 nargs = 2;
26408 rmode = V1TImode;
26409 nargs_constant = 1;
26410 break;
26411 case V8HI_FTYPE_V8HI_INT:
26412 case V8HI_FTYPE_V8SF_INT:
26413 case V8HI_FTYPE_V4SF_INT:
26414 case V8SF_FTYPE_V8SF_INT:
26415 case V4SI_FTYPE_V4SI_INT:
26416 case V4SI_FTYPE_V8SI_INT:
26417 case V4HI_FTYPE_V4HI_INT:
26418 case V4DF_FTYPE_V4DF_INT:
26419 case V4SF_FTYPE_V4SF_INT:
26420 case V4SF_FTYPE_V8SF_INT:
26421 case V2DI_FTYPE_V2DI_INT:
26422 case V2DF_FTYPE_V2DF_INT:
26423 case V2DF_FTYPE_V4DF_INT:
26424 nargs = 2;
26425 nargs_constant = 1;
26426 break;
26427 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26428 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26429 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26430 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26431 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26432 nargs = 3;
26433 break;
26434 case V16QI_FTYPE_V16QI_V16QI_INT:
26435 case V8HI_FTYPE_V8HI_V8HI_INT:
26436 case V8SI_FTYPE_V8SI_V8SI_INT:
26437 case V8SI_FTYPE_V8SI_V4SI_INT:
26438 case V8SF_FTYPE_V8SF_V8SF_INT:
26439 case V8SF_FTYPE_V8SF_V4SF_INT:
26440 case V4SI_FTYPE_V4SI_V4SI_INT:
26441 case V4DF_FTYPE_V4DF_V4DF_INT:
26442 case V4DF_FTYPE_V4DF_V2DF_INT:
26443 case V4SF_FTYPE_V4SF_V4SF_INT:
26444 case V2DI_FTYPE_V2DI_V2DI_INT:
26445 case V2DF_FTYPE_V2DF_V2DF_INT:
26446 nargs = 3;
26447 nargs_constant = 1;
26448 break;
26449 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26450 nargs = 3;
26451 rmode = V2DImode;
26452 nargs_constant = 1;
26453 break;
26454 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26455 nargs = 3;
26456 rmode = DImode;
26457 nargs_constant = 1;
26458 break;
26459 case V2DI_FTYPE_V2DI_UINT_UINT:
26460 nargs = 3;
26461 nargs_constant = 2;
26462 break;
26463 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26464 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26465 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26466 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26467 nargs = 4;
26468 nargs_constant = 1;
26469 break;
26470 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26471 nargs = 4;
26472 nargs_constant = 2;
26473 break;
26474 default:
26475 gcc_unreachable ();
26476 }
26477
26478 gcc_assert (nargs <= ARRAY_SIZE (args));
26479
26480 if (comparison != UNKNOWN)
26481 {
26482 gcc_assert (nargs == 2);
26483 return ix86_expand_sse_compare (d, exp, target, swap);
26484 }
26485
26486 if (rmode == VOIDmode || rmode == tmode)
26487 {
26488 if (optimize
26489 || target == 0
26490 || GET_MODE (target) != tmode
26491 || !insn_p->operand[0].predicate (target, tmode))
26492 target = gen_reg_rtx (tmode);
26493 real_target = target;
26494 }
26495 else
26496 {
26497 target = gen_reg_rtx (rmode);
26498 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26499 }
26500
26501 for (i = 0; i < nargs; i++)
26502 {
26503 tree arg = CALL_EXPR_ARG (exp, i);
26504 rtx op = expand_normal (arg);
26505 enum machine_mode mode = insn_p->operand[i + 1].mode;
26506 bool match = insn_p->operand[i + 1].predicate (op, mode);
26507
26508 if (last_arg_count && (i + 1) == nargs)
26509 {
26510 /* SIMD shift insns take either an 8-bit immediate or
26511 register as count. But builtin functions take int as
26512 count. If count doesn't match, we put it in register. */
26513 if (!match)
26514 {
26515 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26516 if (!insn_p->operand[i + 1].predicate (op, mode))
26517 op = copy_to_reg (op);
26518 }
26519 }
26520 else if ((nargs - i) <= nargs_constant)
26521 {
26522 if (!match)
26523 switch (icode)
26524 {
26525 case CODE_FOR_sse4_1_roundpd:
26526 case CODE_FOR_sse4_1_roundps:
26527 case CODE_FOR_sse4_1_roundsd:
26528 case CODE_FOR_sse4_1_roundss:
26529 case CODE_FOR_sse4_1_blendps:
26530 case CODE_FOR_avx_blendpd256:
26531 case CODE_FOR_avx_vpermilv4df:
26532 case CODE_FOR_avx_roundpd256:
26533 case CODE_FOR_avx_roundps256:
26534 error ("the last argument must be a 4-bit immediate");
26535 return const0_rtx;
26536
26537 case CODE_FOR_sse4_1_blendpd:
26538 case CODE_FOR_avx_vpermilv2df:
26539 case CODE_FOR_xop_vpermil2v2df3:
26540 case CODE_FOR_xop_vpermil2v4sf3:
26541 case CODE_FOR_xop_vpermil2v4df3:
26542 case CODE_FOR_xop_vpermil2v8sf3:
26543 error ("the last argument must be a 2-bit immediate");
26544 return const0_rtx;
26545
26546 case CODE_FOR_avx_vextractf128v4df:
26547 case CODE_FOR_avx_vextractf128v8sf:
26548 case CODE_FOR_avx_vextractf128v8si:
26549 case CODE_FOR_avx_vinsertf128v4df:
26550 case CODE_FOR_avx_vinsertf128v8sf:
26551 case CODE_FOR_avx_vinsertf128v8si:
26552 error ("the last argument must be a 1-bit immediate");
26553 return const0_rtx;
26554
26555 case CODE_FOR_avx_vmcmpv2df3:
26556 case CODE_FOR_avx_vmcmpv4sf3:
26557 case CODE_FOR_avx_cmpv2df3:
26558 case CODE_FOR_avx_cmpv4sf3:
26559 case CODE_FOR_avx_cmpv4df3:
26560 case CODE_FOR_avx_cmpv8sf3:
26561 error ("the last argument must be a 5-bit immediate");
26562 return const0_rtx;
26563
26564 default:
26565 switch (nargs_constant)
26566 {
26567 case 2:
26568 if ((nargs - i) == nargs_constant)
26569 {
26570 error ("the next to last argument must be an 8-bit immediate");
26571 break;
26572 }
26573 case 1:
26574 error ("the last argument must be an 8-bit immediate");
26575 break;
26576 default:
26577 gcc_unreachable ();
26578 }
26579 return const0_rtx;
26580 }
26581 }
26582 else
26583 {
26584 if (VECTOR_MODE_P (mode))
26585 op = safe_vector_operand (op, mode);
26586
26587 /* If we aren't optimizing, only allow one memory operand to
26588 be generated. */
26589 if (memory_operand (op, mode))
26590 num_memory++;
26591
26592 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26593 {
26594 if (optimize || !match || num_memory > 1)
26595 op = copy_to_mode_reg (mode, op);
26596 }
26597 else
26598 {
26599 op = copy_to_reg (op);
26600 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26601 }
26602 }
26603
26604 args[i].op = op;
26605 args[i].mode = mode;
26606 }
26607
26608 switch (nargs)
26609 {
26610 case 1:
26611 pat = GEN_FCN (icode) (real_target, args[0].op);
26612 break;
26613 case 2:
26614 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26615 break;
26616 case 3:
26617 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26618 args[2].op);
26619 break;
26620 case 4:
26621 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26622 args[2].op, args[3].op);
26623 break;
26624 default:
26625 gcc_unreachable ();
26626 }
26627
26628 if (! pat)
26629 return 0;
26630
26631 emit_insn (pat);
26632 return target;
26633 }
26634
26635 /* Subroutine of ix86_expand_builtin to take care of special insns
26636 with variable number of operands. */
26637
26638 static rtx
26639 ix86_expand_special_args_builtin (const struct builtin_description *d,
26640 tree exp, rtx target)
26641 {
26642 tree arg;
26643 rtx pat, op;
26644 unsigned int i, nargs, arg_adjust, memory;
26645 struct
26646 {
26647 rtx op;
26648 enum machine_mode mode;
26649 } args[3];
26650 enum insn_code icode = d->icode;
26651 bool last_arg_constant = false;
26652 const struct insn_data_d *insn_p = &insn_data[icode];
26653 enum machine_mode tmode = insn_p->operand[0].mode;
26654 enum { load, store } klass;
26655
26656 switch ((enum ix86_builtin_func_type) d->flag)
26657 {
26658 case VOID_FTYPE_VOID:
26659 if (icode == CODE_FOR_avx_vzeroupper)
26660 target = GEN_INT (vzeroupper_intrinsic);
26661 emit_insn (GEN_FCN (icode) (target));
26662 return 0;
26663 case VOID_FTYPE_UINT64:
26664 case VOID_FTYPE_UNSIGNED:
26665 nargs = 0;
26666 klass = store;
26667 memory = 0;
26668 break;
26669 break;
26670 case UINT64_FTYPE_VOID:
26671 case UNSIGNED_FTYPE_VOID:
26672 nargs = 0;
26673 klass = load;
26674 memory = 0;
26675 break;
26676 case UINT64_FTYPE_PUNSIGNED:
26677 case V2DI_FTYPE_PV2DI:
26678 case V32QI_FTYPE_PCCHAR:
26679 case V16QI_FTYPE_PCCHAR:
26680 case V8SF_FTYPE_PCV4SF:
26681 case V8SF_FTYPE_PCFLOAT:
26682 case V4SF_FTYPE_PCFLOAT:
26683 case V4DF_FTYPE_PCV2DF:
26684 case V4DF_FTYPE_PCDOUBLE:
26685 case V2DF_FTYPE_PCDOUBLE:
26686 case VOID_FTYPE_PVOID:
26687 nargs = 1;
26688 klass = load;
26689 memory = 0;
26690 break;
26691 case VOID_FTYPE_PV2SF_V4SF:
26692 case VOID_FTYPE_PV4DI_V4DI:
26693 case VOID_FTYPE_PV2DI_V2DI:
26694 case VOID_FTYPE_PCHAR_V32QI:
26695 case VOID_FTYPE_PCHAR_V16QI:
26696 case VOID_FTYPE_PFLOAT_V8SF:
26697 case VOID_FTYPE_PFLOAT_V4SF:
26698 case VOID_FTYPE_PDOUBLE_V4DF:
26699 case VOID_FTYPE_PDOUBLE_V2DF:
26700 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26701 case VOID_FTYPE_PINT_INT:
26702 nargs = 1;
26703 klass = store;
26704 /* Reserve memory operand for target. */
26705 memory = ARRAY_SIZE (args);
26706 break;
26707 case V4SF_FTYPE_V4SF_PCV2SF:
26708 case V2DF_FTYPE_V2DF_PCDOUBLE:
26709 nargs = 2;
26710 klass = load;
26711 memory = 1;
26712 break;
26713 case V8SF_FTYPE_PCV8SF_V8SI:
26714 case V4DF_FTYPE_PCV4DF_V4DI:
26715 case V4SF_FTYPE_PCV4SF_V4SI:
26716 case V2DF_FTYPE_PCV2DF_V2DI:
26717 nargs = 2;
26718 klass = load;
26719 memory = 0;
26720 break;
26721 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26722 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26723 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26724 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26725 nargs = 2;
26726 klass = store;
26727 /* Reserve memory operand for target. */
26728 memory = ARRAY_SIZE (args);
26729 break;
26730 case VOID_FTYPE_UINT_UINT_UINT:
26731 case VOID_FTYPE_UINT64_UINT_UINT:
26732 case UCHAR_FTYPE_UINT_UINT_UINT:
26733 case UCHAR_FTYPE_UINT64_UINT_UINT:
26734 nargs = 3;
26735 klass = load;
26736 memory = ARRAY_SIZE (args);
26737 last_arg_constant = true;
26738 break;
26739 default:
26740 gcc_unreachable ();
26741 }
26742
26743 gcc_assert (nargs <= ARRAY_SIZE (args));
26744
26745 if (klass == store)
26746 {
26747 arg = CALL_EXPR_ARG (exp, 0);
26748 op = expand_normal (arg);
26749 gcc_assert (target == 0);
26750 if (memory)
26751 {
26752 if (GET_MODE (op) != Pmode)
26753 op = convert_to_mode (Pmode, op, 1);
26754 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26755 }
26756 else
26757 target = force_reg (tmode, op);
26758 arg_adjust = 1;
26759 }
26760 else
26761 {
26762 arg_adjust = 0;
26763 if (optimize
26764 || target == 0
26765 || GET_MODE (target) != tmode
26766 || !insn_p->operand[0].predicate (target, tmode))
26767 target = gen_reg_rtx (tmode);
26768 }
26769
26770 for (i = 0; i < nargs; i++)
26771 {
26772 enum machine_mode mode = insn_p->operand[i + 1].mode;
26773 bool match;
26774
26775 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26776 op = expand_normal (arg);
26777 match = insn_p->operand[i + 1].predicate (op, mode);
26778
26779 if (last_arg_constant && (i + 1) == nargs)
26780 {
26781 if (!match)
26782 {
26783 if (icode == CODE_FOR_lwp_lwpvalsi3
26784 || icode == CODE_FOR_lwp_lwpinssi3
26785 || icode == CODE_FOR_lwp_lwpvaldi3
26786 || icode == CODE_FOR_lwp_lwpinsdi3)
26787 error ("the last argument must be a 32-bit immediate");
26788 else
26789 error ("the last argument must be an 8-bit immediate");
26790 return const0_rtx;
26791 }
26792 }
26793 else
26794 {
26795 if (i == memory)
26796 {
26797 /* This must be the memory operand. */
26798 if (GET_MODE (op) != Pmode)
26799 op = convert_to_mode (Pmode, op, 1);
26800 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26801 gcc_assert (GET_MODE (op) == mode
26802 || GET_MODE (op) == VOIDmode);
26803 }
26804 else
26805 {
26806 /* This must be register. */
26807 if (VECTOR_MODE_P (mode))
26808 op = safe_vector_operand (op, mode);
26809
26810 gcc_assert (GET_MODE (op) == mode
26811 || GET_MODE (op) == VOIDmode);
26812 op = copy_to_mode_reg (mode, op);
26813 }
26814 }
26815
26816 args[i].op = op;
26817 args[i].mode = mode;
26818 }
26819
26820 switch (nargs)
26821 {
26822 case 0:
26823 pat = GEN_FCN (icode) (target);
26824 break;
26825 case 1:
26826 pat = GEN_FCN (icode) (target, args[0].op);
26827 break;
26828 case 2:
26829 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26830 break;
26831 case 3:
26832 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26833 break;
26834 default:
26835 gcc_unreachable ();
26836 }
26837
26838 if (! pat)
26839 return 0;
26840 emit_insn (pat);
26841 return klass == store ? 0 : target;
26842 }
26843
26844 /* Return the integer constant in ARG. Constrain it to be in the range
26845 of the subparts of VEC_TYPE; issue an error if not. */
26846
26847 static int
26848 get_element_number (tree vec_type, tree arg)
26849 {
26850 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26851
26852 if (!host_integerp (arg, 1)
26853 || (elt = tree_low_cst (arg, 1), elt > max))
26854 {
26855 error ("selector must be an integer constant in the range 0..%wi", max);
26856 return 0;
26857 }
26858
26859 return elt;
26860 }
26861
26862 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26863 ix86_expand_vector_init. We DO have language-level syntax for this, in
26864 the form of (type){ init-list }. Except that since we can't place emms
26865 instructions from inside the compiler, we can't allow the use of MMX
26866 registers unless the user explicitly asks for it. So we do *not* define
26867 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26868 we have builtins invoked by mmintrin.h that gives us license to emit
26869 these sorts of instructions. */
26870
26871 static rtx
26872 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26873 {
26874 enum machine_mode tmode = TYPE_MODE (type);
26875 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26876 int i, n_elt = GET_MODE_NUNITS (tmode);
26877 rtvec v = rtvec_alloc (n_elt);
26878
26879 gcc_assert (VECTOR_MODE_P (tmode));
26880 gcc_assert (call_expr_nargs (exp) == n_elt);
26881
26882 for (i = 0; i < n_elt; ++i)
26883 {
26884 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26885 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26886 }
26887
26888 if (!target || !register_operand (target, tmode))
26889 target = gen_reg_rtx (tmode);
26890
26891 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26892 return target;
26893 }
26894
26895 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26896 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26897 had a language-level syntax for referencing vector elements. */
26898
26899 static rtx
26900 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26901 {
26902 enum machine_mode tmode, mode0;
26903 tree arg0, arg1;
26904 int elt;
26905 rtx op0;
26906
26907 arg0 = CALL_EXPR_ARG (exp, 0);
26908 arg1 = CALL_EXPR_ARG (exp, 1);
26909
26910 op0 = expand_normal (arg0);
26911 elt = get_element_number (TREE_TYPE (arg0), arg1);
26912
26913 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26914 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26915 gcc_assert (VECTOR_MODE_P (mode0));
26916
26917 op0 = force_reg (mode0, op0);
26918
26919 if (optimize || !target || !register_operand (target, tmode))
26920 target = gen_reg_rtx (tmode);
26921
26922 ix86_expand_vector_extract (true, target, op0, elt);
26923
26924 return target;
26925 }
26926
26927 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26928 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26929 a language-level syntax for referencing vector elements. */
26930
26931 static rtx
26932 ix86_expand_vec_set_builtin (tree exp)
26933 {
26934 enum machine_mode tmode, mode1;
26935 tree arg0, arg1, arg2;
26936 int elt;
26937 rtx op0, op1, target;
26938
26939 arg0 = CALL_EXPR_ARG (exp, 0);
26940 arg1 = CALL_EXPR_ARG (exp, 1);
26941 arg2 = CALL_EXPR_ARG (exp, 2);
26942
26943 tmode = TYPE_MODE (TREE_TYPE (arg0));
26944 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26945 gcc_assert (VECTOR_MODE_P (tmode));
26946
26947 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26948 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26949 elt = get_element_number (TREE_TYPE (arg0), arg2);
26950
26951 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26952 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26953
26954 op0 = force_reg (tmode, op0);
26955 op1 = force_reg (mode1, op1);
26956
26957 /* OP0 is the source of these builtin functions and shouldn't be
26958 modified. Create a copy, use it and return it as target. */
26959 target = gen_reg_rtx (tmode);
26960 emit_move_insn (target, op0);
26961 ix86_expand_vector_set (true, target, op1, elt);
26962
26963 return target;
26964 }
26965
26966 /* Expand an expression EXP that calls a built-in function,
26967 with result going to TARGET if that's convenient
26968 (and in mode MODE if that's convenient).
26969 SUBTARGET may be used as the target for computing one of EXP's operands.
26970 IGNORE is nonzero if the value is to be ignored. */
26971
26972 static rtx
26973 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26974 enum machine_mode mode ATTRIBUTE_UNUSED,
26975 int ignore ATTRIBUTE_UNUSED)
26976 {
26977 const struct builtin_description *d;
26978 size_t i;
26979 enum insn_code icode;
26980 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26981 tree arg0, arg1, arg2;
26982 rtx op0, op1, op2, pat;
26983 enum machine_mode mode0, mode1, mode2;
26984 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26985
26986 /* Determine whether the builtin function is available under the current ISA.
26987 Originally the builtin was not created if it wasn't applicable to the
26988 current ISA based on the command line switches. With function specific
26989 options, we need to check in the context of the function making the call
26990 whether it is supported. */
26991 if (ix86_builtins_isa[fcode].isa
26992 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26993 {
26994 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26995 NULL, (enum fpmath_unit) 0, false);
26996
26997 if (!opts)
26998 error ("%qE needs unknown isa option", fndecl);
26999 else
27000 {
27001 gcc_assert (opts != NULL);
27002 error ("%qE needs isa option %s", fndecl, opts);
27003 free (opts);
27004 }
27005 return const0_rtx;
27006 }
27007
27008 switch (fcode)
27009 {
27010 case IX86_BUILTIN_MASKMOVQ:
27011 case IX86_BUILTIN_MASKMOVDQU:
27012 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27013 ? CODE_FOR_mmx_maskmovq
27014 : CODE_FOR_sse2_maskmovdqu);
27015 /* Note the arg order is different from the operand order. */
27016 arg1 = CALL_EXPR_ARG (exp, 0);
27017 arg2 = CALL_EXPR_ARG (exp, 1);
27018 arg0 = CALL_EXPR_ARG (exp, 2);
27019 op0 = expand_normal (arg0);
27020 op1 = expand_normal (arg1);
27021 op2 = expand_normal (arg2);
27022 mode0 = insn_data[icode].operand[0].mode;
27023 mode1 = insn_data[icode].operand[1].mode;
27024 mode2 = insn_data[icode].operand[2].mode;
27025
27026 if (GET_MODE (op0) != Pmode)
27027 op0 = convert_to_mode (Pmode, op0, 1);
27028 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27029
27030 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27031 op0 = copy_to_mode_reg (mode0, op0);
27032 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27033 op1 = copy_to_mode_reg (mode1, op1);
27034 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27035 op2 = copy_to_mode_reg (mode2, op2);
27036 pat = GEN_FCN (icode) (op0, op1, op2);
27037 if (! pat)
27038 return 0;
27039 emit_insn (pat);
27040 return 0;
27041
27042 case IX86_BUILTIN_LDMXCSR:
27043 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27044 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27045 emit_move_insn (target, op0);
27046 emit_insn (gen_sse_ldmxcsr (target));
27047 return 0;
27048
27049 case IX86_BUILTIN_STMXCSR:
27050 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27051 emit_insn (gen_sse_stmxcsr (target));
27052 return copy_to_mode_reg (SImode, target);
27053
27054 case IX86_BUILTIN_CLFLUSH:
27055 arg0 = CALL_EXPR_ARG (exp, 0);
27056 op0 = expand_normal (arg0);
27057 icode = CODE_FOR_sse2_clflush;
27058 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27059 {
27060 if (GET_MODE (op0) != Pmode)
27061 op0 = convert_to_mode (Pmode, op0, 1);
27062 op0 = force_reg (Pmode, op0);
27063 }
27064
27065 emit_insn (gen_sse2_clflush (op0));
27066 return 0;
27067
27068 case IX86_BUILTIN_MONITOR:
27069 arg0 = CALL_EXPR_ARG (exp, 0);
27070 arg1 = CALL_EXPR_ARG (exp, 1);
27071 arg2 = CALL_EXPR_ARG (exp, 2);
27072 op0 = expand_normal (arg0);
27073 op1 = expand_normal (arg1);
27074 op2 = expand_normal (arg2);
27075 if (!REG_P (op0))
27076 {
27077 if (GET_MODE (op0) != Pmode)
27078 op0 = convert_to_mode (Pmode, op0, 1);
27079 op0 = force_reg (Pmode, op0);
27080 }
27081 if (!REG_P (op1))
27082 op1 = copy_to_mode_reg (SImode, op1);
27083 if (!REG_P (op2))
27084 op2 = copy_to_mode_reg (SImode, op2);
27085 emit_insn (ix86_gen_monitor (op0, op1, op2));
27086 return 0;
27087
27088 case IX86_BUILTIN_MWAIT:
27089 arg0 = CALL_EXPR_ARG (exp, 0);
27090 arg1 = CALL_EXPR_ARG (exp, 1);
27091 op0 = expand_normal (arg0);
27092 op1 = expand_normal (arg1);
27093 if (!REG_P (op0))
27094 op0 = copy_to_mode_reg (SImode, op0);
27095 if (!REG_P (op1))
27096 op1 = copy_to_mode_reg (SImode, op1);
27097 emit_insn (gen_sse3_mwait (op0, op1));
27098 return 0;
27099
27100 case IX86_BUILTIN_VEC_INIT_V2SI:
27101 case IX86_BUILTIN_VEC_INIT_V4HI:
27102 case IX86_BUILTIN_VEC_INIT_V8QI:
27103 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27104
27105 case IX86_BUILTIN_VEC_EXT_V2DF:
27106 case IX86_BUILTIN_VEC_EXT_V2DI:
27107 case IX86_BUILTIN_VEC_EXT_V4SF:
27108 case IX86_BUILTIN_VEC_EXT_V4SI:
27109 case IX86_BUILTIN_VEC_EXT_V8HI:
27110 case IX86_BUILTIN_VEC_EXT_V2SI:
27111 case IX86_BUILTIN_VEC_EXT_V4HI:
27112 case IX86_BUILTIN_VEC_EXT_V16QI:
27113 return ix86_expand_vec_ext_builtin (exp, target);
27114
27115 case IX86_BUILTIN_VEC_SET_V2DI:
27116 case IX86_BUILTIN_VEC_SET_V4SF:
27117 case IX86_BUILTIN_VEC_SET_V4SI:
27118 case IX86_BUILTIN_VEC_SET_V8HI:
27119 case IX86_BUILTIN_VEC_SET_V4HI:
27120 case IX86_BUILTIN_VEC_SET_V16QI:
27121 return ix86_expand_vec_set_builtin (exp);
27122
27123 case IX86_BUILTIN_VEC_PERM_V2DF:
27124 case IX86_BUILTIN_VEC_PERM_V4SF:
27125 case IX86_BUILTIN_VEC_PERM_V2DI:
27126 case IX86_BUILTIN_VEC_PERM_V4SI:
27127 case IX86_BUILTIN_VEC_PERM_V8HI:
27128 case IX86_BUILTIN_VEC_PERM_V16QI:
27129 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27130 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27131 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27132 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27133 case IX86_BUILTIN_VEC_PERM_V4DF:
27134 case IX86_BUILTIN_VEC_PERM_V8SF:
27135 return ix86_expand_vec_perm_builtin (exp);
27136
27137 case IX86_BUILTIN_INFQ:
27138 case IX86_BUILTIN_HUGE_VALQ:
27139 {
27140 REAL_VALUE_TYPE inf;
27141 rtx tmp;
27142
27143 real_inf (&inf);
27144 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27145
27146 tmp = validize_mem (force_const_mem (mode, tmp));
27147
27148 if (target == 0)
27149 target = gen_reg_rtx (mode);
27150
27151 emit_move_insn (target, tmp);
27152 return target;
27153 }
27154
27155 case IX86_BUILTIN_LLWPCB:
27156 arg0 = CALL_EXPR_ARG (exp, 0);
27157 op0 = expand_normal (arg0);
27158 icode = CODE_FOR_lwp_llwpcb;
27159 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27160 {
27161 if (GET_MODE (op0) != Pmode)
27162 op0 = convert_to_mode (Pmode, op0, 1);
27163 op0 = force_reg (Pmode, op0);
27164 }
27165 emit_insn (gen_lwp_llwpcb (op0));
27166 return 0;
27167
27168 case IX86_BUILTIN_SLWPCB:
27169 icode = CODE_FOR_lwp_slwpcb;
27170 if (!target
27171 || !insn_data[icode].operand[0].predicate (target, Pmode))
27172 target = gen_reg_rtx (Pmode);
27173 emit_insn (gen_lwp_slwpcb (target));
27174 return target;
27175
27176 case IX86_BUILTIN_BEXTRI32:
27177 case IX86_BUILTIN_BEXTRI64:
27178 arg0 = CALL_EXPR_ARG (exp, 0);
27179 arg1 = CALL_EXPR_ARG (exp, 1);
27180 op0 = expand_normal (arg0);
27181 op1 = expand_normal (arg1);
27182 icode = (fcode == IX86_BUILTIN_BEXTRI32
27183 ? CODE_FOR_tbm_bextri_si
27184 : CODE_FOR_tbm_bextri_di);
27185 if (!CONST_INT_P (op1))
27186 {
27187 error ("last argument must be an immediate");
27188 return const0_rtx;
27189 }
27190 else
27191 {
27192 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27193 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27194 op1 = GEN_INT (length);
27195 op2 = GEN_INT (lsb_index);
27196 pat = GEN_FCN (icode) (target, op0, op1, op2);
27197 if (pat)
27198 emit_insn (pat);
27199 return target;
27200 }
27201
27202 case IX86_BUILTIN_RDRAND16_STEP:
27203 icode = CODE_FOR_rdrandhi_1;
27204 mode0 = HImode;
27205 goto rdrand_step;
27206
27207 case IX86_BUILTIN_RDRAND32_STEP:
27208 icode = CODE_FOR_rdrandsi_1;
27209 mode0 = SImode;
27210 goto rdrand_step;
27211
27212 case IX86_BUILTIN_RDRAND64_STEP:
27213 icode = CODE_FOR_rdranddi_1;
27214 mode0 = DImode;
27215
27216 rdrand_step:
27217 op0 = gen_reg_rtx (mode0);
27218 emit_insn (GEN_FCN (icode) (op0));
27219
27220 arg0 = CALL_EXPR_ARG (exp, 0);
27221 op1 = expand_normal (arg0);
27222 if (!address_operand (op1, VOIDmode))
27223 {
27224 op1 = convert_memory_address (Pmode, op1);
27225 op1 = copy_addr_to_reg (op1);
27226 }
27227 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27228
27229 op1 = gen_reg_rtx (SImode);
27230 emit_move_insn (op1, CONST1_RTX (SImode));
27231
27232 /* Emit SImode conditional move. */
27233 if (mode0 == HImode)
27234 {
27235 op2 = gen_reg_rtx (SImode);
27236 emit_insn (gen_zero_extendhisi2 (op2, op0));
27237 }
27238 else if (mode0 == SImode)
27239 op2 = op0;
27240 else
27241 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27242
27243 if (target == 0)
27244 target = gen_reg_rtx (SImode);
27245
27246 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27247 const0_rtx);
27248 emit_insn (gen_rtx_SET (VOIDmode, target,
27249 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27250 return target;
27251
27252 default:
27253 break;
27254 }
27255
27256 for (i = 0, d = bdesc_special_args;
27257 i < ARRAY_SIZE (bdesc_special_args);
27258 i++, d++)
27259 if (d->code == fcode)
27260 return ix86_expand_special_args_builtin (d, exp, target);
27261
27262 for (i = 0, d = bdesc_args;
27263 i < ARRAY_SIZE (bdesc_args);
27264 i++, d++)
27265 if (d->code == fcode)
27266 switch (fcode)
27267 {
27268 case IX86_BUILTIN_FABSQ:
27269 case IX86_BUILTIN_COPYSIGNQ:
27270 if (!TARGET_SSE2)
27271 /* Emit a normal call if SSE2 isn't available. */
27272 return expand_call (exp, target, ignore);
27273 default:
27274 return ix86_expand_args_builtin (d, exp, target);
27275 }
27276
27277 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27278 if (d->code == fcode)
27279 return ix86_expand_sse_comi (d, exp, target);
27280
27281 for (i = 0, d = bdesc_pcmpestr;
27282 i < ARRAY_SIZE (bdesc_pcmpestr);
27283 i++, d++)
27284 if (d->code == fcode)
27285 return ix86_expand_sse_pcmpestr (d, exp, target);
27286
27287 for (i = 0, d = bdesc_pcmpistr;
27288 i < ARRAY_SIZE (bdesc_pcmpistr);
27289 i++, d++)
27290 if (d->code == fcode)
27291 return ix86_expand_sse_pcmpistr (d, exp, target);
27292
27293 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27294 if (d->code == fcode)
27295 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27296 (enum ix86_builtin_func_type)
27297 d->flag, d->comparison);
27298
27299 gcc_unreachable ();
27300 }
27301
27302 /* Returns a function decl for a vectorized version of the builtin function
27303 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27304 if it is not available. */
27305
27306 static tree
27307 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27308 tree type_in)
27309 {
27310 enum machine_mode in_mode, out_mode;
27311 int in_n, out_n;
27312 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27313
27314 if (TREE_CODE (type_out) != VECTOR_TYPE
27315 || TREE_CODE (type_in) != VECTOR_TYPE
27316 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27317 return NULL_TREE;
27318
27319 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27320 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27321 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27322 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27323
27324 switch (fn)
27325 {
27326 case BUILT_IN_SQRT:
27327 if (out_mode == DFmode && in_mode == DFmode)
27328 {
27329 if (out_n == 2 && in_n == 2)
27330 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27331 else if (out_n == 4 && in_n == 4)
27332 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27333 }
27334 break;
27335
27336 case BUILT_IN_SQRTF:
27337 if (out_mode == SFmode && in_mode == SFmode)
27338 {
27339 if (out_n == 4 && in_n == 4)
27340 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27341 else if (out_n == 8 && in_n == 8)
27342 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27343 }
27344 break;
27345
27346 case BUILT_IN_LRINT:
27347 if (out_mode == SImode && out_n == 4
27348 && in_mode == DFmode && in_n == 2)
27349 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27350 break;
27351
27352 case BUILT_IN_LRINTF:
27353 if (out_mode == SImode && in_mode == SFmode)
27354 {
27355 if (out_n == 4 && in_n == 4)
27356 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27357 else if (out_n == 8 && in_n == 8)
27358 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27359 }
27360 break;
27361
27362 case BUILT_IN_COPYSIGN:
27363 if (out_mode == DFmode && in_mode == DFmode)
27364 {
27365 if (out_n == 2 && in_n == 2)
27366 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27367 else if (out_n == 4 && in_n == 4)
27368 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27369 }
27370 break;
27371
27372 case BUILT_IN_COPYSIGNF:
27373 if (out_mode == SFmode && in_mode == SFmode)
27374 {
27375 if (out_n == 4 && in_n == 4)
27376 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27377 else if (out_n == 8 && in_n == 8)
27378 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27379 }
27380 break;
27381
27382 case BUILT_IN_FLOOR:
27383 /* The round insn does not trap on denormals. */
27384 if (flag_trapping_math || !TARGET_ROUND)
27385 break;
27386
27387 if (out_mode == DFmode && in_mode == DFmode)
27388 {
27389 if (out_n == 2 && in_n == 2)
27390 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27391 else if (out_n == 4 && in_n == 4)
27392 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27393 }
27394 break;
27395
27396 case BUILT_IN_FLOORF:
27397 /* The round insn does not trap on denormals. */
27398 if (flag_trapping_math || !TARGET_ROUND)
27399 break;
27400
27401 if (out_mode == SFmode && in_mode == SFmode)
27402 {
27403 if (out_n == 4 && in_n == 4)
27404 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27405 else if (out_n == 8 && in_n == 8)
27406 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27407 }
27408 break;
27409
27410 case BUILT_IN_CEIL:
27411 /* The round insn does not trap on denormals. */
27412 if (flag_trapping_math || !TARGET_ROUND)
27413 break;
27414
27415 if (out_mode == DFmode && in_mode == DFmode)
27416 {
27417 if (out_n == 2 && in_n == 2)
27418 return ix86_builtins[IX86_BUILTIN_CEILPD];
27419 else if (out_n == 4 && in_n == 4)
27420 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27421 }
27422 break;
27423
27424 case BUILT_IN_CEILF:
27425 /* The round insn does not trap on denormals. */
27426 if (flag_trapping_math || !TARGET_ROUND)
27427 break;
27428
27429 if (out_mode == SFmode && in_mode == SFmode)
27430 {
27431 if (out_n == 4 && in_n == 4)
27432 return ix86_builtins[IX86_BUILTIN_CEILPS];
27433 else if (out_n == 8 && in_n == 8)
27434 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27435 }
27436 break;
27437
27438 case BUILT_IN_TRUNC:
27439 /* The round insn does not trap on denormals. */
27440 if (flag_trapping_math || !TARGET_ROUND)
27441 break;
27442
27443 if (out_mode == DFmode && in_mode == DFmode)
27444 {
27445 if (out_n == 2 && in_n == 2)
27446 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27447 else if (out_n == 4 && in_n == 4)
27448 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27449 }
27450 break;
27451
27452 case BUILT_IN_TRUNCF:
27453 /* The round insn does not trap on denormals. */
27454 if (flag_trapping_math || !TARGET_ROUND)
27455 break;
27456
27457 if (out_mode == SFmode && in_mode == SFmode)
27458 {
27459 if (out_n == 4 && in_n == 4)
27460 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27461 else if (out_n == 8 && in_n == 8)
27462 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27463 }
27464 break;
27465
27466 case BUILT_IN_RINT:
27467 /* The round insn does not trap on denormals. */
27468 if (flag_trapping_math || !TARGET_ROUND)
27469 break;
27470
27471 if (out_mode == DFmode && in_mode == DFmode)
27472 {
27473 if (out_n == 2 && in_n == 2)
27474 return ix86_builtins[IX86_BUILTIN_RINTPD];
27475 else if (out_n == 4 && in_n == 4)
27476 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27477 }
27478 break;
27479
27480 case BUILT_IN_RINTF:
27481 /* The round insn does not trap on denormals. */
27482 if (flag_trapping_math || !TARGET_ROUND)
27483 break;
27484
27485 if (out_mode == SFmode && in_mode == SFmode)
27486 {
27487 if (out_n == 4 && in_n == 4)
27488 return ix86_builtins[IX86_BUILTIN_RINTPS];
27489 else if (out_n == 8 && in_n == 8)
27490 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27491 }
27492 break;
27493
27494 case BUILT_IN_FMA:
27495 if (out_mode == DFmode && in_mode == DFmode)
27496 {
27497 if (out_n == 2 && in_n == 2)
27498 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27499 if (out_n == 4 && in_n == 4)
27500 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27501 }
27502 break;
27503
27504 case BUILT_IN_FMAF:
27505 if (out_mode == SFmode && in_mode == SFmode)
27506 {
27507 if (out_n == 4 && in_n == 4)
27508 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27509 if (out_n == 8 && in_n == 8)
27510 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27511 }
27512 break;
27513
27514 default:
27515 break;
27516 }
27517
27518 /* Dispatch to a handler for a vectorization library. */
27519 if (ix86_veclib_handler)
27520 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27521 type_in);
27522
27523 return NULL_TREE;
27524 }
27525
27526 /* Handler for an SVML-style interface to
27527 a library with vectorized intrinsics. */
27528
27529 static tree
27530 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27531 {
27532 char name[20];
27533 tree fntype, new_fndecl, args;
27534 unsigned arity;
27535 const char *bname;
27536 enum machine_mode el_mode, in_mode;
27537 int n, in_n;
27538
27539 /* The SVML is suitable for unsafe math only. */
27540 if (!flag_unsafe_math_optimizations)
27541 return NULL_TREE;
27542
27543 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27544 n = TYPE_VECTOR_SUBPARTS (type_out);
27545 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27546 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27547 if (el_mode != in_mode
27548 || n != in_n)
27549 return NULL_TREE;
27550
27551 switch (fn)
27552 {
27553 case BUILT_IN_EXP:
27554 case BUILT_IN_LOG:
27555 case BUILT_IN_LOG10:
27556 case BUILT_IN_POW:
27557 case BUILT_IN_TANH:
27558 case BUILT_IN_TAN:
27559 case BUILT_IN_ATAN:
27560 case BUILT_IN_ATAN2:
27561 case BUILT_IN_ATANH:
27562 case BUILT_IN_CBRT:
27563 case BUILT_IN_SINH:
27564 case BUILT_IN_SIN:
27565 case BUILT_IN_ASINH:
27566 case BUILT_IN_ASIN:
27567 case BUILT_IN_COSH:
27568 case BUILT_IN_COS:
27569 case BUILT_IN_ACOSH:
27570 case BUILT_IN_ACOS:
27571 if (el_mode != DFmode || n != 2)
27572 return NULL_TREE;
27573 break;
27574
27575 case BUILT_IN_EXPF:
27576 case BUILT_IN_LOGF:
27577 case BUILT_IN_LOG10F:
27578 case BUILT_IN_POWF:
27579 case BUILT_IN_TANHF:
27580 case BUILT_IN_TANF:
27581 case BUILT_IN_ATANF:
27582 case BUILT_IN_ATAN2F:
27583 case BUILT_IN_ATANHF:
27584 case BUILT_IN_CBRTF:
27585 case BUILT_IN_SINHF:
27586 case BUILT_IN_SINF:
27587 case BUILT_IN_ASINHF:
27588 case BUILT_IN_ASINF:
27589 case BUILT_IN_COSHF:
27590 case BUILT_IN_COSF:
27591 case BUILT_IN_ACOSHF:
27592 case BUILT_IN_ACOSF:
27593 if (el_mode != SFmode || n != 4)
27594 return NULL_TREE;
27595 break;
27596
27597 default:
27598 return NULL_TREE;
27599 }
27600
27601 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27602
27603 if (fn == BUILT_IN_LOGF)
27604 strcpy (name, "vmlsLn4");
27605 else if (fn == BUILT_IN_LOG)
27606 strcpy (name, "vmldLn2");
27607 else if (n == 4)
27608 {
27609 sprintf (name, "vmls%s", bname+10);
27610 name[strlen (name)-1] = '4';
27611 }
27612 else
27613 sprintf (name, "vmld%s2", bname+10);
27614
27615 /* Convert to uppercase. */
27616 name[4] &= ~0x20;
27617
27618 arity = 0;
27619 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27620 args = TREE_CHAIN (args))
27621 arity++;
27622
27623 if (arity == 1)
27624 fntype = build_function_type_list (type_out, type_in, NULL);
27625 else
27626 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27627
27628 /* Build a function declaration for the vectorized function. */
27629 new_fndecl = build_decl (BUILTINS_LOCATION,
27630 FUNCTION_DECL, get_identifier (name), fntype);
27631 TREE_PUBLIC (new_fndecl) = 1;
27632 DECL_EXTERNAL (new_fndecl) = 1;
27633 DECL_IS_NOVOPS (new_fndecl) = 1;
27634 TREE_READONLY (new_fndecl) = 1;
27635
27636 return new_fndecl;
27637 }
27638
27639 /* Handler for an ACML-style interface to
27640 a library with vectorized intrinsics. */
27641
27642 static tree
27643 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27644 {
27645 char name[20] = "__vr.._";
27646 tree fntype, new_fndecl, args;
27647 unsigned arity;
27648 const char *bname;
27649 enum machine_mode el_mode, in_mode;
27650 int n, in_n;
27651
27652 /* The ACML is 64bits only and suitable for unsafe math only as
27653 it does not correctly support parts of IEEE with the required
27654 precision such as denormals. */
27655 if (!TARGET_64BIT
27656 || !flag_unsafe_math_optimizations)
27657 return NULL_TREE;
27658
27659 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27660 n = TYPE_VECTOR_SUBPARTS (type_out);
27661 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27662 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27663 if (el_mode != in_mode
27664 || n != in_n)
27665 return NULL_TREE;
27666
27667 switch (fn)
27668 {
27669 case BUILT_IN_SIN:
27670 case BUILT_IN_COS:
27671 case BUILT_IN_EXP:
27672 case BUILT_IN_LOG:
27673 case BUILT_IN_LOG2:
27674 case BUILT_IN_LOG10:
27675 name[4] = 'd';
27676 name[5] = '2';
27677 if (el_mode != DFmode
27678 || n != 2)
27679 return NULL_TREE;
27680 break;
27681
27682 case BUILT_IN_SINF:
27683 case BUILT_IN_COSF:
27684 case BUILT_IN_EXPF:
27685 case BUILT_IN_POWF:
27686 case BUILT_IN_LOGF:
27687 case BUILT_IN_LOG2F:
27688 case BUILT_IN_LOG10F:
27689 name[4] = 's';
27690 name[5] = '4';
27691 if (el_mode != SFmode
27692 || n != 4)
27693 return NULL_TREE;
27694 break;
27695
27696 default:
27697 return NULL_TREE;
27698 }
27699
27700 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27701 sprintf (name + 7, "%s", bname+10);
27702
27703 arity = 0;
27704 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27705 args = TREE_CHAIN (args))
27706 arity++;
27707
27708 if (arity == 1)
27709 fntype = build_function_type_list (type_out, type_in, NULL);
27710 else
27711 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27712
27713 /* Build a function declaration for the vectorized function. */
27714 new_fndecl = build_decl (BUILTINS_LOCATION,
27715 FUNCTION_DECL, get_identifier (name), fntype);
27716 TREE_PUBLIC (new_fndecl) = 1;
27717 DECL_EXTERNAL (new_fndecl) = 1;
27718 DECL_IS_NOVOPS (new_fndecl) = 1;
27719 TREE_READONLY (new_fndecl) = 1;
27720
27721 return new_fndecl;
27722 }
27723
27724
27725 /* Returns a decl of a function that implements conversion of an integer vector
27726 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27727 are the types involved when converting according to CODE.
27728 Return NULL_TREE if it is not available. */
27729
27730 static tree
27731 ix86_vectorize_builtin_conversion (unsigned int code,
27732 tree dest_type, tree src_type)
27733 {
27734 if (! TARGET_SSE2)
27735 return NULL_TREE;
27736
27737 switch (code)
27738 {
27739 case FLOAT_EXPR:
27740 switch (TYPE_MODE (src_type))
27741 {
27742 case V4SImode:
27743 switch (TYPE_MODE (dest_type))
27744 {
27745 case V4SFmode:
27746 return (TYPE_UNSIGNED (src_type)
27747 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27748 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27749 case V4DFmode:
27750 return (TYPE_UNSIGNED (src_type)
27751 ? NULL_TREE
27752 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27753 default:
27754 return NULL_TREE;
27755 }
27756 break;
27757 case V8SImode:
27758 switch (TYPE_MODE (dest_type))
27759 {
27760 case V8SFmode:
27761 return (TYPE_UNSIGNED (src_type)
27762 ? NULL_TREE
27763 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27764 default:
27765 return NULL_TREE;
27766 }
27767 break;
27768 default:
27769 return NULL_TREE;
27770 }
27771
27772 case FIX_TRUNC_EXPR:
27773 switch (TYPE_MODE (dest_type))
27774 {
27775 case V4SImode:
27776 switch (TYPE_MODE (src_type))
27777 {
27778 case V4SFmode:
27779 return (TYPE_UNSIGNED (dest_type)
27780 ? NULL_TREE
27781 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27782 case V4DFmode:
27783 return (TYPE_UNSIGNED (dest_type)
27784 ? NULL_TREE
27785 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27786 default:
27787 return NULL_TREE;
27788 }
27789 break;
27790
27791 case V8SImode:
27792 switch (TYPE_MODE (src_type))
27793 {
27794 case V8SFmode:
27795 return (TYPE_UNSIGNED (dest_type)
27796 ? NULL_TREE
27797 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27798 default:
27799 return NULL_TREE;
27800 }
27801 break;
27802
27803 default:
27804 return NULL_TREE;
27805 }
27806
27807 default:
27808 return NULL_TREE;
27809 }
27810
27811 return NULL_TREE;
27812 }
27813
27814 /* Returns a code for a target-specific builtin that implements
27815 reciprocal of the function, or NULL_TREE if not available. */
27816
27817 static tree
27818 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27819 bool sqrt ATTRIBUTE_UNUSED)
27820 {
27821 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27822 && flag_finite_math_only && !flag_trapping_math
27823 && flag_unsafe_math_optimizations))
27824 return NULL_TREE;
27825
27826 if (md_fn)
27827 /* Machine dependent builtins. */
27828 switch (fn)
27829 {
27830 /* Vectorized version of sqrt to rsqrt conversion. */
27831 case IX86_BUILTIN_SQRTPS_NR:
27832 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27833
27834 case IX86_BUILTIN_SQRTPS_NR256:
27835 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27836
27837 default:
27838 return NULL_TREE;
27839 }
27840 else
27841 /* Normal builtins. */
27842 switch (fn)
27843 {
27844 /* Sqrt to rsqrt conversion. */
27845 case BUILT_IN_SQRTF:
27846 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27847
27848 default:
27849 return NULL_TREE;
27850 }
27851 }
27852 \f
27853 /* Helper for avx_vpermilps256_operand et al. This is also used by
27854 the expansion functions to turn the parallel back into a mask.
27855 The return value is 0 for no match and the imm8+1 for a match. */
27856
27857 int
27858 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27859 {
27860 unsigned i, nelt = GET_MODE_NUNITS (mode);
27861 unsigned mask = 0;
27862 unsigned char ipar[8];
27863
27864 if (XVECLEN (par, 0) != (int) nelt)
27865 return 0;
27866
27867 /* Validate that all of the elements are constants, and not totally
27868 out of range. Copy the data into an integral array to make the
27869 subsequent checks easier. */
27870 for (i = 0; i < nelt; ++i)
27871 {
27872 rtx er = XVECEXP (par, 0, i);
27873 unsigned HOST_WIDE_INT ei;
27874
27875 if (!CONST_INT_P (er))
27876 return 0;
27877 ei = INTVAL (er);
27878 if (ei >= nelt)
27879 return 0;
27880 ipar[i] = ei;
27881 }
27882
27883 switch (mode)
27884 {
27885 case V4DFmode:
27886 /* In the 256-bit DFmode case, we can only move elements within
27887 a 128-bit lane. */
27888 for (i = 0; i < 2; ++i)
27889 {
27890 if (ipar[i] >= 2)
27891 return 0;
27892 mask |= ipar[i] << i;
27893 }
27894 for (i = 2; i < 4; ++i)
27895 {
27896 if (ipar[i] < 2)
27897 return 0;
27898 mask |= (ipar[i] - 2) << i;
27899 }
27900 break;
27901
27902 case V8SFmode:
27903 /* In the 256-bit SFmode case, we have full freedom of movement
27904 within the low 128-bit lane, but the high 128-bit lane must
27905 mirror the exact same pattern. */
27906 for (i = 0; i < 4; ++i)
27907 if (ipar[i] + 4 != ipar[i + 4])
27908 return 0;
27909 nelt = 4;
27910 /* FALLTHRU */
27911
27912 case V2DFmode:
27913 case V4SFmode:
27914 /* In the 128-bit case, we've full freedom in the placement of
27915 the elements from the source operand. */
27916 for (i = 0; i < nelt; ++i)
27917 mask |= ipar[i] << (i * (nelt / 2));
27918 break;
27919
27920 default:
27921 gcc_unreachable ();
27922 }
27923
27924 /* Make sure success has a non-zero value by adding one. */
27925 return mask + 1;
27926 }
27927
27928 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27929 the expansion functions to turn the parallel back into a mask.
27930 The return value is 0 for no match and the imm8+1 for a match. */
27931
27932 int
27933 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27934 {
27935 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27936 unsigned mask = 0;
27937 unsigned char ipar[8];
27938
27939 if (XVECLEN (par, 0) != (int) nelt)
27940 return 0;
27941
27942 /* Validate that all of the elements are constants, and not totally
27943 out of range. Copy the data into an integral array to make the
27944 subsequent checks easier. */
27945 for (i = 0; i < nelt; ++i)
27946 {
27947 rtx er = XVECEXP (par, 0, i);
27948 unsigned HOST_WIDE_INT ei;
27949
27950 if (!CONST_INT_P (er))
27951 return 0;
27952 ei = INTVAL (er);
27953 if (ei >= 2 * nelt)
27954 return 0;
27955 ipar[i] = ei;
27956 }
27957
27958 /* Validate that the halves of the permute are halves. */
27959 for (i = 0; i < nelt2 - 1; ++i)
27960 if (ipar[i] + 1 != ipar[i + 1])
27961 return 0;
27962 for (i = nelt2; i < nelt - 1; ++i)
27963 if (ipar[i] + 1 != ipar[i + 1])
27964 return 0;
27965
27966 /* Reconstruct the mask. */
27967 for (i = 0; i < 2; ++i)
27968 {
27969 unsigned e = ipar[i * nelt2];
27970 if (e % nelt2)
27971 return 0;
27972 e /= nelt2;
27973 mask |= e << (i * 4);
27974 }
27975
27976 /* Make sure success has a non-zero value by adding one. */
27977 return mask + 1;
27978 }
27979 \f
27980
27981 /* Store OPERAND to the memory after reload is completed. This means
27982 that we can't easily use assign_stack_local. */
27983 rtx
27984 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27985 {
27986 rtx result;
27987
27988 gcc_assert (reload_completed);
27989 if (ix86_using_red_zone ())
27990 {
27991 result = gen_rtx_MEM (mode,
27992 gen_rtx_PLUS (Pmode,
27993 stack_pointer_rtx,
27994 GEN_INT (-RED_ZONE_SIZE)));
27995 emit_move_insn (result, operand);
27996 }
27997 else if (TARGET_64BIT)
27998 {
27999 switch (mode)
28000 {
28001 case HImode:
28002 case SImode:
28003 operand = gen_lowpart (DImode, operand);
28004 /* FALLTHRU */
28005 case DImode:
28006 emit_insn (
28007 gen_rtx_SET (VOIDmode,
28008 gen_rtx_MEM (DImode,
28009 gen_rtx_PRE_DEC (DImode,
28010 stack_pointer_rtx)),
28011 operand));
28012 break;
28013 default:
28014 gcc_unreachable ();
28015 }
28016 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28017 }
28018 else
28019 {
28020 switch (mode)
28021 {
28022 case DImode:
28023 {
28024 rtx operands[2];
28025 split_double_mode (mode, &operand, 1, operands, operands + 1);
28026 emit_insn (
28027 gen_rtx_SET (VOIDmode,
28028 gen_rtx_MEM (SImode,
28029 gen_rtx_PRE_DEC (Pmode,
28030 stack_pointer_rtx)),
28031 operands[1]));
28032 emit_insn (
28033 gen_rtx_SET (VOIDmode,
28034 gen_rtx_MEM (SImode,
28035 gen_rtx_PRE_DEC (Pmode,
28036 stack_pointer_rtx)),
28037 operands[0]));
28038 }
28039 break;
28040 case HImode:
28041 /* Store HImodes as SImodes. */
28042 operand = gen_lowpart (SImode, operand);
28043 /* FALLTHRU */
28044 case SImode:
28045 emit_insn (
28046 gen_rtx_SET (VOIDmode,
28047 gen_rtx_MEM (GET_MODE (operand),
28048 gen_rtx_PRE_DEC (SImode,
28049 stack_pointer_rtx)),
28050 operand));
28051 break;
28052 default:
28053 gcc_unreachable ();
28054 }
28055 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28056 }
28057 return result;
28058 }
28059
28060 /* Free operand from the memory. */
28061 void
28062 ix86_free_from_memory (enum machine_mode mode)
28063 {
28064 if (!ix86_using_red_zone ())
28065 {
28066 int size;
28067
28068 if (mode == DImode || TARGET_64BIT)
28069 size = 8;
28070 else
28071 size = 4;
28072 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28073 to pop or add instruction if registers are available. */
28074 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28075 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28076 GEN_INT (size))));
28077 }
28078 }
28079
28080 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28081
28082 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28083 QImode must go into class Q_REGS.
28084 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28085 movdf to do mem-to-mem moves through integer regs. */
28086
28087 static reg_class_t
28088 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28089 {
28090 enum machine_mode mode = GET_MODE (x);
28091
28092 /* We're only allowed to return a subclass of CLASS. Many of the
28093 following checks fail for NO_REGS, so eliminate that early. */
28094 if (regclass == NO_REGS)
28095 return NO_REGS;
28096
28097 /* All classes can load zeros. */
28098 if (x == CONST0_RTX (mode))
28099 return regclass;
28100
28101 /* Force constants into memory if we are loading a (nonzero) constant into
28102 an MMX or SSE register. This is because there are no MMX/SSE instructions
28103 to load from a constant. */
28104 if (CONSTANT_P (x)
28105 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28106 return NO_REGS;
28107
28108 /* Prefer SSE regs only, if we can use them for math. */
28109 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28110 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28111
28112 /* Floating-point constants need more complex checks. */
28113 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28114 {
28115 /* General regs can load everything. */
28116 if (reg_class_subset_p (regclass, GENERAL_REGS))
28117 return regclass;
28118
28119 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28120 zero above. We only want to wind up preferring 80387 registers if
28121 we plan on doing computation with them. */
28122 if (TARGET_80387
28123 && standard_80387_constant_p (x) > 0)
28124 {
28125 /* Limit class to non-sse. */
28126 if (regclass == FLOAT_SSE_REGS)
28127 return FLOAT_REGS;
28128 if (regclass == FP_TOP_SSE_REGS)
28129 return FP_TOP_REG;
28130 if (regclass == FP_SECOND_SSE_REGS)
28131 return FP_SECOND_REG;
28132 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28133 return regclass;
28134 }
28135
28136 return NO_REGS;
28137 }
28138
28139 /* Generally when we see PLUS here, it's the function invariant
28140 (plus soft-fp const_int). Which can only be computed into general
28141 regs. */
28142 if (GET_CODE (x) == PLUS)
28143 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28144
28145 /* QImode constants are easy to load, but non-constant QImode data
28146 must go into Q_REGS. */
28147 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28148 {
28149 if (reg_class_subset_p (regclass, Q_REGS))
28150 return regclass;
28151 if (reg_class_subset_p (Q_REGS, regclass))
28152 return Q_REGS;
28153 return NO_REGS;
28154 }
28155
28156 return regclass;
28157 }
28158
28159 /* Discourage putting floating-point values in SSE registers unless
28160 SSE math is being used, and likewise for the 387 registers. */
28161 static reg_class_t
28162 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28163 {
28164 enum machine_mode mode = GET_MODE (x);
28165
28166 /* Restrict the output reload class to the register bank that we are doing
28167 math on. If we would like not to return a subset of CLASS, reject this
28168 alternative: if reload cannot do this, it will still use its choice. */
28169 mode = GET_MODE (x);
28170 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28171 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28172
28173 if (X87_FLOAT_MODE_P (mode))
28174 {
28175 if (regclass == FP_TOP_SSE_REGS)
28176 return FP_TOP_REG;
28177 else if (regclass == FP_SECOND_SSE_REGS)
28178 return FP_SECOND_REG;
28179 else
28180 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28181 }
28182
28183 return regclass;
28184 }
28185
28186 static reg_class_t
28187 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28188 enum machine_mode mode,
28189 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28190 {
28191 /* QImode spills from non-QI registers require
28192 intermediate register on 32bit targets. */
28193 if (!TARGET_64BIT
28194 && !in_p && mode == QImode
28195 && (rclass == GENERAL_REGS
28196 || rclass == LEGACY_REGS
28197 || rclass == INDEX_REGS))
28198 {
28199 int regno;
28200
28201 if (REG_P (x))
28202 regno = REGNO (x);
28203 else
28204 regno = -1;
28205
28206 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28207 regno = true_regnum (x);
28208
28209 /* Return Q_REGS if the operand is in memory. */
28210 if (regno == -1)
28211 return Q_REGS;
28212 }
28213
28214 /* This condition handles corner case where an expression involving
28215 pointers gets vectorized. We're trying to use the address of a
28216 stack slot as a vector initializer.
28217
28218 (set (reg:V2DI 74 [ vect_cst_.2 ])
28219 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28220
28221 Eventually frame gets turned into sp+offset like this:
28222
28223 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28224 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28225 (const_int 392 [0x188]))))
28226
28227 That later gets turned into:
28228
28229 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28230 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28231 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28232
28233 We'll have the following reload recorded:
28234
28235 Reload 0: reload_in (DI) =
28236 (plus:DI (reg/f:DI 7 sp)
28237 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28238 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28239 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28240 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28241 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28242 reload_reg_rtx: (reg:V2DI 22 xmm1)
28243
28244 Which isn't going to work since SSE instructions can't handle scalar
28245 additions. Returning GENERAL_REGS forces the addition into integer
28246 register and reload can handle subsequent reloads without problems. */
28247
28248 if (in_p && GET_CODE (x) == PLUS
28249 && SSE_CLASS_P (rclass)
28250 && SCALAR_INT_MODE_P (mode))
28251 return GENERAL_REGS;
28252
28253 return NO_REGS;
28254 }
28255
28256 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28257
28258 static bool
28259 ix86_class_likely_spilled_p (reg_class_t rclass)
28260 {
28261 switch (rclass)
28262 {
28263 case AREG:
28264 case DREG:
28265 case CREG:
28266 case BREG:
28267 case AD_REGS:
28268 case SIREG:
28269 case DIREG:
28270 case SSE_FIRST_REG:
28271 case FP_TOP_REG:
28272 case FP_SECOND_REG:
28273 return true;
28274
28275 default:
28276 break;
28277 }
28278
28279 return false;
28280 }
28281
28282 /* If we are copying between general and FP registers, we need a memory
28283 location. The same is true for SSE and MMX registers.
28284
28285 To optimize register_move_cost performance, allow inline variant.
28286
28287 The macro can't work reliably when one of the CLASSES is class containing
28288 registers from multiple units (SSE, MMX, integer). We avoid this by never
28289 combining those units in single alternative in the machine description.
28290 Ensure that this constraint holds to avoid unexpected surprises.
28291
28292 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28293 enforce these sanity checks. */
28294
28295 static inline bool
28296 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28297 enum machine_mode mode, int strict)
28298 {
28299 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28300 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28301 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28302 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28303 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28304 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28305 {
28306 gcc_assert (!strict);
28307 return true;
28308 }
28309
28310 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28311 return true;
28312
28313 /* ??? This is a lie. We do have moves between mmx/general, and for
28314 mmx/sse2. But by saying we need secondary memory we discourage the
28315 register allocator from using the mmx registers unless needed. */
28316 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28317 return true;
28318
28319 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28320 {
28321 /* SSE1 doesn't have any direct moves from other classes. */
28322 if (!TARGET_SSE2)
28323 return true;
28324
28325 /* If the target says that inter-unit moves are more expensive
28326 than moving through memory, then don't generate them. */
28327 if (!TARGET_INTER_UNIT_MOVES)
28328 return true;
28329
28330 /* Between SSE and general, we have moves no larger than word size. */
28331 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28332 return true;
28333 }
28334
28335 return false;
28336 }
28337
28338 bool
28339 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28340 enum machine_mode mode, int strict)
28341 {
28342 return inline_secondary_memory_needed (class1, class2, mode, strict);
28343 }
28344
28345 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28346
28347 On the 80386, this is the size of MODE in words,
28348 except in the FP regs, where a single reg is always enough. */
28349
28350 static unsigned char
28351 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28352 {
28353 if (MAYBE_INTEGER_CLASS_P (rclass))
28354 {
28355 if (mode == XFmode)
28356 return (TARGET_64BIT ? 2 : 3);
28357 else if (mode == XCmode)
28358 return (TARGET_64BIT ? 4 : 6);
28359 else
28360 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28361 }
28362 else
28363 {
28364 if (COMPLEX_MODE_P (mode))
28365 return 2;
28366 else
28367 return 1;
28368 }
28369 }
28370
28371 /* Return true if the registers in CLASS cannot represent the change from
28372 modes FROM to TO. */
28373
28374 bool
28375 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28376 enum reg_class regclass)
28377 {
28378 if (from == to)
28379 return false;
28380
28381 /* x87 registers can't do subreg at all, as all values are reformatted
28382 to extended precision. */
28383 if (MAYBE_FLOAT_CLASS_P (regclass))
28384 return true;
28385
28386 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28387 {
28388 /* Vector registers do not support QI or HImode loads. If we don't
28389 disallow a change to these modes, reload will assume it's ok to
28390 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28391 the vec_dupv4hi pattern. */
28392 if (GET_MODE_SIZE (from) < 4)
28393 return true;
28394
28395 /* Vector registers do not support subreg with nonzero offsets, which
28396 are otherwise valid for integer registers. Since we can't see
28397 whether we have a nonzero offset from here, prohibit all
28398 nonparadoxical subregs changing size. */
28399 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28400 return true;
28401 }
28402
28403 return false;
28404 }
28405
28406 /* Return the cost of moving data of mode M between a
28407 register and memory. A value of 2 is the default; this cost is
28408 relative to those in `REGISTER_MOVE_COST'.
28409
28410 This function is used extensively by register_move_cost that is used to
28411 build tables at startup. Make it inline in this case.
28412 When IN is 2, return maximum of in and out move cost.
28413
28414 If moving between registers and memory is more expensive than
28415 between two registers, you should define this macro to express the
28416 relative cost.
28417
28418 Model also increased moving costs of QImode registers in non
28419 Q_REGS classes.
28420 */
28421 static inline int
28422 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28423 int in)
28424 {
28425 int cost;
28426 if (FLOAT_CLASS_P (regclass))
28427 {
28428 int index;
28429 switch (mode)
28430 {
28431 case SFmode:
28432 index = 0;
28433 break;
28434 case DFmode:
28435 index = 1;
28436 break;
28437 case XFmode:
28438 index = 2;
28439 break;
28440 default:
28441 return 100;
28442 }
28443 if (in == 2)
28444 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28445 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28446 }
28447 if (SSE_CLASS_P (regclass))
28448 {
28449 int index;
28450 switch (GET_MODE_SIZE (mode))
28451 {
28452 case 4:
28453 index = 0;
28454 break;
28455 case 8:
28456 index = 1;
28457 break;
28458 case 16:
28459 index = 2;
28460 break;
28461 default:
28462 return 100;
28463 }
28464 if (in == 2)
28465 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28466 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28467 }
28468 if (MMX_CLASS_P (regclass))
28469 {
28470 int index;
28471 switch (GET_MODE_SIZE (mode))
28472 {
28473 case 4:
28474 index = 0;
28475 break;
28476 case 8:
28477 index = 1;
28478 break;
28479 default:
28480 return 100;
28481 }
28482 if (in)
28483 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28484 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28485 }
28486 switch (GET_MODE_SIZE (mode))
28487 {
28488 case 1:
28489 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28490 {
28491 if (!in)
28492 return ix86_cost->int_store[0];
28493 if (TARGET_PARTIAL_REG_DEPENDENCY
28494 && optimize_function_for_speed_p (cfun))
28495 cost = ix86_cost->movzbl_load;
28496 else
28497 cost = ix86_cost->int_load[0];
28498 if (in == 2)
28499 return MAX (cost, ix86_cost->int_store[0]);
28500 return cost;
28501 }
28502 else
28503 {
28504 if (in == 2)
28505 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28506 if (in)
28507 return ix86_cost->movzbl_load;
28508 else
28509 return ix86_cost->int_store[0] + 4;
28510 }
28511 break;
28512 case 2:
28513 if (in == 2)
28514 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28515 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28516 default:
28517 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28518 if (mode == TFmode)
28519 mode = XFmode;
28520 if (in == 2)
28521 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28522 else if (in)
28523 cost = ix86_cost->int_load[2];
28524 else
28525 cost = ix86_cost->int_store[2];
28526 return (cost * (((int) GET_MODE_SIZE (mode)
28527 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28528 }
28529 }
28530
28531 static int
28532 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28533 bool in)
28534 {
28535 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28536 }
28537
28538
28539 /* Return the cost of moving data from a register in class CLASS1 to
28540 one in class CLASS2.
28541
28542 It is not required that the cost always equal 2 when FROM is the same as TO;
28543 on some machines it is expensive to move between registers if they are not
28544 general registers. */
28545
28546 static int
28547 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28548 reg_class_t class2_i)
28549 {
28550 enum reg_class class1 = (enum reg_class) class1_i;
28551 enum reg_class class2 = (enum reg_class) class2_i;
28552
28553 /* In case we require secondary memory, compute cost of the store followed
28554 by load. In order to avoid bad register allocation choices, we need
28555 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28556
28557 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28558 {
28559 int cost = 1;
28560
28561 cost += inline_memory_move_cost (mode, class1, 2);
28562 cost += inline_memory_move_cost (mode, class2, 2);
28563
28564 /* In case of copying from general_purpose_register we may emit multiple
28565 stores followed by single load causing memory size mismatch stall.
28566 Count this as arbitrarily high cost of 20. */
28567 if (targetm.class_max_nregs (class1, mode)
28568 > targetm.class_max_nregs (class2, mode))
28569 cost += 20;
28570
28571 /* In the case of FP/MMX moves, the registers actually overlap, and we
28572 have to switch modes in order to treat them differently. */
28573 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28574 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28575 cost += 20;
28576
28577 return cost;
28578 }
28579
28580 /* Moves between SSE/MMX and integer unit are expensive. */
28581 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28582 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28583
28584 /* ??? By keeping returned value relatively high, we limit the number
28585 of moves between integer and MMX/SSE registers for all targets.
28586 Additionally, high value prevents problem with x86_modes_tieable_p(),
28587 where integer modes in MMX/SSE registers are not tieable
28588 because of missing QImode and HImode moves to, from or between
28589 MMX/SSE registers. */
28590 return MAX (8, ix86_cost->mmxsse_to_integer);
28591
28592 if (MAYBE_FLOAT_CLASS_P (class1))
28593 return ix86_cost->fp_move;
28594 if (MAYBE_SSE_CLASS_P (class1))
28595 return ix86_cost->sse_move;
28596 if (MAYBE_MMX_CLASS_P (class1))
28597 return ix86_cost->mmx_move;
28598 return 2;
28599 }
28600
28601 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28602 MODE. */
28603
28604 bool
28605 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28606 {
28607 /* Flags and only flags can only hold CCmode values. */
28608 if (CC_REGNO_P (regno))
28609 return GET_MODE_CLASS (mode) == MODE_CC;
28610 if (GET_MODE_CLASS (mode) == MODE_CC
28611 || GET_MODE_CLASS (mode) == MODE_RANDOM
28612 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28613 return false;
28614 if (FP_REGNO_P (regno))
28615 return VALID_FP_MODE_P (mode);
28616 if (SSE_REGNO_P (regno))
28617 {
28618 /* We implement the move patterns for all vector modes into and
28619 out of SSE registers, even when no operation instructions
28620 are available. OImode move is available only when AVX is
28621 enabled. */
28622 return ((TARGET_AVX && mode == OImode)
28623 || VALID_AVX256_REG_MODE (mode)
28624 || VALID_SSE_REG_MODE (mode)
28625 || VALID_SSE2_REG_MODE (mode)
28626 || VALID_MMX_REG_MODE (mode)
28627 || VALID_MMX_REG_MODE_3DNOW (mode));
28628 }
28629 if (MMX_REGNO_P (regno))
28630 {
28631 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28632 so if the register is available at all, then we can move data of
28633 the given mode into or out of it. */
28634 return (VALID_MMX_REG_MODE (mode)
28635 || VALID_MMX_REG_MODE_3DNOW (mode));
28636 }
28637
28638 if (mode == QImode)
28639 {
28640 /* Take care for QImode values - they can be in non-QI regs,
28641 but then they do cause partial register stalls. */
28642 if (regno <= BX_REG || TARGET_64BIT)
28643 return true;
28644 if (!TARGET_PARTIAL_REG_STALL)
28645 return true;
28646 return !can_create_pseudo_p ();
28647 }
28648 /* We handle both integer and floats in the general purpose registers. */
28649 else if (VALID_INT_MODE_P (mode))
28650 return true;
28651 else if (VALID_FP_MODE_P (mode))
28652 return true;
28653 else if (VALID_DFP_MODE_P (mode))
28654 return true;
28655 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28656 on to use that value in smaller contexts, this can easily force a
28657 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28658 supporting DImode, allow it. */
28659 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28660 return true;
28661
28662 return false;
28663 }
28664
28665 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28666 tieable integer mode. */
28667
28668 static bool
28669 ix86_tieable_integer_mode_p (enum machine_mode mode)
28670 {
28671 switch (mode)
28672 {
28673 case HImode:
28674 case SImode:
28675 return true;
28676
28677 case QImode:
28678 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28679
28680 case DImode:
28681 return TARGET_64BIT;
28682
28683 default:
28684 return false;
28685 }
28686 }
28687
28688 /* Return true if MODE1 is accessible in a register that can hold MODE2
28689 without copying. That is, all register classes that can hold MODE2
28690 can also hold MODE1. */
28691
28692 bool
28693 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28694 {
28695 if (mode1 == mode2)
28696 return true;
28697
28698 if (ix86_tieable_integer_mode_p (mode1)
28699 && ix86_tieable_integer_mode_p (mode2))
28700 return true;
28701
28702 /* MODE2 being XFmode implies fp stack or general regs, which means we
28703 can tie any smaller floating point modes to it. Note that we do not
28704 tie this with TFmode. */
28705 if (mode2 == XFmode)
28706 return mode1 == SFmode || mode1 == DFmode;
28707
28708 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28709 that we can tie it with SFmode. */
28710 if (mode2 == DFmode)
28711 return mode1 == SFmode;
28712
28713 /* If MODE2 is only appropriate for an SSE register, then tie with
28714 any other mode acceptable to SSE registers. */
28715 if (GET_MODE_SIZE (mode2) == 16
28716 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28717 return (GET_MODE_SIZE (mode1) == 16
28718 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28719
28720 /* If MODE2 is appropriate for an MMX register, then tie
28721 with any other mode acceptable to MMX registers. */
28722 if (GET_MODE_SIZE (mode2) == 8
28723 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28724 return (GET_MODE_SIZE (mode1) == 8
28725 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28726
28727 return false;
28728 }
28729
28730 /* Compute a (partial) cost for rtx X. Return true if the complete
28731 cost has been computed, and false if subexpressions should be
28732 scanned. In either case, *TOTAL contains the cost result. */
28733
28734 static bool
28735 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28736 {
28737 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28738 enum machine_mode mode = GET_MODE (x);
28739 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28740
28741 switch (code)
28742 {
28743 case CONST_INT:
28744 case CONST:
28745 case LABEL_REF:
28746 case SYMBOL_REF:
28747 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28748 *total = 3;
28749 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28750 *total = 2;
28751 else if (flag_pic && SYMBOLIC_CONST (x)
28752 && (!TARGET_64BIT
28753 || (!GET_CODE (x) != LABEL_REF
28754 && (GET_CODE (x) != SYMBOL_REF
28755 || !SYMBOL_REF_LOCAL_P (x)))))
28756 *total = 1;
28757 else
28758 *total = 0;
28759 return true;
28760
28761 case CONST_DOUBLE:
28762 if (mode == VOIDmode)
28763 *total = 0;
28764 else
28765 switch (standard_80387_constant_p (x))
28766 {
28767 case 1: /* 0.0 */
28768 *total = 1;
28769 break;
28770 default: /* Other constants */
28771 *total = 2;
28772 break;
28773 case 0:
28774 case -1:
28775 /* Start with (MEM (SYMBOL_REF)), since that's where
28776 it'll probably end up. Add a penalty for size. */
28777 *total = (COSTS_N_INSNS (1)
28778 + (flag_pic != 0 && !TARGET_64BIT)
28779 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28780 break;
28781 }
28782 return true;
28783
28784 case ZERO_EXTEND:
28785 /* The zero extensions is often completely free on x86_64, so make
28786 it as cheap as possible. */
28787 if (TARGET_64BIT && mode == DImode
28788 && GET_MODE (XEXP (x, 0)) == SImode)
28789 *total = 1;
28790 else if (TARGET_ZERO_EXTEND_WITH_AND)
28791 *total = cost->add;
28792 else
28793 *total = cost->movzx;
28794 return false;
28795
28796 case SIGN_EXTEND:
28797 *total = cost->movsx;
28798 return false;
28799
28800 case ASHIFT:
28801 if (CONST_INT_P (XEXP (x, 1))
28802 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28803 {
28804 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28805 if (value == 1)
28806 {
28807 *total = cost->add;
28808 return false;
28809 }
28810 if ((value == 2 || value == 3)
28811 && cost->lea <= cost->shift_const)
28812 {
28813 *total = cost->lea;
28814 return false;
28815 }
28816 }
28817 /* FALLTHRU */
28818
28819 case ROTATE:
28820 case ASHIFTRT:
28821 case LSHIFTRT:
28822 case ROTATERT:
28823 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28824 {
28825 if (CONST_INT_P (XEXP (x, 1)))
28826 {
28827 if (INTVAL (XEXP (x, 1)) > 32)
28828 *total = cost->shift_const + COSTS_N_INSNS (2);
28829 else
28830 *total = cost->shift_const * 2;
28831 }
28832 else
28833 {
28834 if (GET_CODE (XEXP (x, 1)) == AND)
28835 *total = cost->shift_var * 2;
28836 else
28837 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28838 }
28839 }
28840 else
28841 {
28842 if (CONST_INT_P (XEXP (x, 1)))
28843 *total = cost->shift_const;
28844 else
28845 *total = cost->shift_var;
28846 }
28847 return false;
28848
28849 case FMA:
28850 {
28851 rtx sub;
28852
28853 gcc_assert (FLOAT_MODE_P (mode));
28854 gcc_assert (TARGET_FMA || TARGET_FMA4);
28855
28856 /* ??? SSE scalar/vector cost should be used here. */
28857 /* ??? Bald assumption that fma has the same cost as fmul. */
28858 *total = cost->fmul;
28859 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28860
28861 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28862 sub = XEXP (x, 0);
28863 if (GET_CODE (sub) == NEG)
28864 sub = XEXP (sub, 0);
28865 *total += rtx_cost (sub, FMA, speed);
28866
28867 sub = XEXP (x, 2);
28868 if (GET_CODE (sub) == NEG)
28869 sub = XEXP (sub, 0);
28870 *total += rtx_cost (sub, FMA, speed);
28871 return true;
28872 }
28873
28874 case MULT:
28875 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28876 {
28877 /* ??? SSE scalar cost should be used here. */
28878 *total = cost->fmul;
28879 return false;
28880 }
28881 else if (X87_FLOAT_MODE_P (mode))
28882 {
28883 *total = cost->fmul;
28884 return false;
28885 }
28886 else if (FLOAT_MODE_P (mode))
28887 {
28888 /* ??? SSE vector cost should be used here. */
28889 *total = cost->fmul;
28890 return false;
28891 }
28892 else
28893 {
28894 rtx op0 = XEXP (x, 0);
28895 rtx op1 = XEXP (x, 1);
28896 int nbits;
28897 if (CONST_INT_P (XEXP (x, 1)))
28898 {
28899 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28900 for (nbits = 0; value != 0; value &= value - 1)
28901 nbits++;
28902 }
28903 else
28904 /* This is arbitrary. */
28905 nbits = 7;
28906
28907 /* Compute costs correctly for widening multiplication. */
28908 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28909 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28910 == GET_MODE_SIZE (mode))
28911 {
28912 int is_mulwiden = 0;
28913 enum machine_mode inner_mode = GET_MODE (op0);
28914
28915 if (GET_CODE (op0) == GET_CODE (op1))
28916 is_mulwiden = 1, op1 = XEXP (op1, 0);
28917 else if (CONST_INT_P (op1))
28918 {
28919 if (GET_CODE (op0) == SIGN_EXTEND)
28920 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28921 == INTVAL (op1);
28922 else
28923 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28924 }
28925
28926 if (is_mulwiden)
28927 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28928 }
28929
28930 *total = (cost->mult_init[MODE_INDEX (mode)]
28931 + nbits * cost->mult_bit
28932 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28933
28934 return true;
28935 }
28936
28937 case DIV:
28938 case UDIV:
28939 case MOD:
28940 case UMOD:
28941 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28942 /* ??? SSE cost should be used here. */
28943 *total = cost->fdiv;
28944 else if (X87_FLOAT_MODE_P (mode))
28945 *total = cost->fdiv;
28946 else if (FLOAT_MODE_P (mode))
28947 /* ??? SSE vector cost should be used here. */
28948 *total = cost->fdiv;
28949 else
28950 *total = cost->divide[MODE_INDEX (mode)];
28951 return false;
28952
28953 case PLUS:
28954 if (GET_MODE_CLASS (mode) == MODE_INT
28955 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28956 {
28957 if (GET_CODE (XEXP (x, 0)) == PLUS
28958 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28959 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28960 && CONSTANT_P (XEXP (x, 1)))
28961 {
28962 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28963 if (val == 2 || val == 4 || val == 8)
28964 {
28965 *total = cost->lea;
28966 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28967 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28968 outer_code, speed);
28969 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28970 return true;
28971 }
28972 }
28973 else if (GET_CODE (XEXP (x, 0)) == MULT
28974 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28975 {
28976 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28977 if (val == 2 || val == 4 || val == 8)
28978 {
28979 *total = cost->lea;
28980 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28981 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28982 return true;
28983 }
28984 }
28985 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28986 {
28987 *total = cost->lea;
28988 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28989 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28990 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28991 return true;
28992 }
28993 }
28994 /* FALLTHRU */
28995
28996 case MINUS:
28997 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28998 {
28999 /* ??? SSE cost should be used here. */
29000 *total = cost->fadd;
29001 return false;
29002 }
29003 else if (X87_FLOAT_MODE_P (mode))
29004 {
29005 *total = cost->fadd;
29006 return false;
29007 }
29008 else if (FLOAT_MODE_P (mode))
29009 {
29010 /* ??? SSE vector cost should be used here. */
29011 *total = cost->fadd;
29012 return false;
29013 }
29014 /* FALLTHRU */
29015
29016 case AND:
29017 case IOR:
29018 case XOR:
29019 if (!TARGET_64BIT && mode == DImode)
29020 {
29021 *total = (cost->add * 2
29022 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29023 << (GET_MODE (XEXP (x, 0)) != DImode))
29024 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29025 << (GET_MODE (XEXP (x, 1)) != DImode)));
29026 return true;
29027 }
29028 /* FALLTHRU */
29029
29030 case NEG:
29031 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29032 {
29033 /* ??? SSE cost should be used here. */
29034 *total = cost->fchs;
29035 return false;
29036 }
29037 else if (X87_FLOAT_MODE_P (mode))
29038 {
29039 *total = cost->fchs;
29040 return false;
29041 }
29042 else if (FLOAT_MODE_P (mode))
29043 {
29044 /* ??? SSE vector cost should be used here. */
29045 *total = cost->fchs;
29046 return false;
29047 }
29048 /* FALLTHRU */
29049
29050 case NOT:
29051 if (!TARGET_64BIT && mode == DImode)
29052 *total = cost->add * 2;
29053 else
29054 *total = cost->add;
29055 return false;
29056
29057 case COMPARE:
29058 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29059 && XEXP (XEXP (x, 0), 1) == const1_rtx
29060 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29061 && XEXP (x, 1) == const0_rtx)
29062 {
29063 /* This kind of construct is implemented using test[bwl].
29064 Treat it as if we had an AND. */
29065 *total = (cost->add
29066 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29067 + rtx_cost (const1_rtx, outer_code, speed));
29068 return true;
29069 }
29070 return false;
29071
29072 case FLOAT_EXTEND:
29073 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29074 *total = 0;
29075 return false;
29076
29077 case ABS:
29078 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29079 /* ??? SSE cost should be used here. */
29080 *total = cost->fabs;
29081 else if (X87_FLOAT_MODE_P (mode))
29082 *total = cost->fabs;
29083 else if (FLOAT_MODE_P (mode))
29084 /* ??? SSE vector cost should be used here. */
29085 *total = cost->fabs;
29086 return false;
29087
29088 case SQRT:
29089 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29090 /* ??? SSE cost should be used here. */
29091 *total = cost->fsqrt;
29092 else if (X87_FLOAT_MODE_P (mode))
29093 *total = cost->fsqrt;
29094 else if (FLOAT_MODE_P (mode))
29095 /* ??? SSE vector cost should be used here. */
29096 *total = cost->fsqrt;
29097 return false;
29098
29099 case UNSPEC:
29100 if (XINT (x, 1) == UNSPEC_TP)
29101 *total = 0;
29102 return false;
29103
29104 case VEC_SELECT:
29105 case VEC_CONCAT:
29106 case VEC_MERGE:
29107 case VEC_DUPLICATE:
29108 /* ??? Assume all of these vector manipulation patterns are
29109 recognizable. In which case they all pretty much have the
29110 same cost. */
29111 *total = COSTS_N_INSNS (1);
29112 return true;
29113
29114 default:
29115 return false;
29116 }
29117 }
29118
29119 #if TARGET_MACHO
29120
29121 static int current_machopic_label_num;
29122
29123 /* Given a symbol name and its associated stub, write out the
29124 definition of the stub. */
29125
29126 void
29127 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29128 {
29129 unsigned int length;
29130 char *binder_name, *symbol_name, lazy_ptr_name[32];
29131 int label = ++current_machopic_label_num;
29132
29133 /* For 64-bit we shouldn't get here. */
29134 gcc_assert (!TARGET_64BIT);
29135
29136 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29137 symb = targetm.strip_name_encoding (symb);
29138
29139 length = strlen (stub);
29140 binder_name = XALLOCAVEC (char, length + 32);
29141 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29142
29143 length = strlen (symb);
29144 symbol_name = XALLOCAVEC (char, length + 32);
29145 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29146
29147 sprintf (lazy_ptr_name, "L%d$lz", label);
29148
29149 if (MACHOPIC_ATT_STUB)
29150 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29151 else if (MACHOPIC_PURE)
29152 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29153 else
29154 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29155
29156 fprintf (file, "%s:\n", stub);
29157 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29158
29159 if (MACHOPIC_ATT_STUB)
29160 {
29161 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29162 }
29163 else if (MACHOPIC_PURE)
29164 {
29165 /* PIC stub. */
29166 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29167 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29168 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29169 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29170 label, lazy_ptr_name, label);
29171 fprintf (file, "\tjmp\t*%%ecx\n");
29172 }
29173 else
29174 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29175
29176 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29177 it needs no stub-binding-helper. */
29178 if (MACHOPIC_ATT_STUB)
29179 return;
29180
29181 fprintf (file, "%s:\n", binder_name);
29182
29183 if (MACHOPIC_PURE)
29184 {
29185 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29186 fprintf (file, "\tpushl\t%%ecx\n");
29187 }
29188 else
29189 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29190
29191 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29192
29193 /* N.B. Keep the correspondence of these
29194 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29195 old-pic/new-pic/non-pic stubs; altering this will break
29196 compatibility with existing dylibs. */
29197 if (MACHOPIC_PURE)
29198 {
29199 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29200 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29201 }
29202 else
29203 /* 16-byte -mdynamic-no-pic stub. */
29204 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29205
29206 fprintf (file, "%s:\n", lazy_ptr_name);
29207 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29208 fprintf (file, ASM_LONG "%s\n", binder_name);
29209 }
29210 #endif /* TARGET_MACHO */
29211
29212 /* Order the registers for register allocator. */
29213
29214 void
29215 x86_order_regs_for_local_alloc (void)
29216 {
29217 int pos = 0;
29218 int i;
29219
29220 /* First allocate the local general purpose registers. */
29221 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29222 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29223 reg_alloc_order [pos++] = i;
29224
29225 /* Global general purpose registers. */
29226 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29227 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29228 reg_alloc_order [pos++] = i;
29229
29230 /* x87 registers come first in case we are doing FP math
29231 using them. */
29232 if (!TARGET_SSE_MATH)
29233 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29234 reg_alloc_order [pos++] = i;
29235
29236 /* SSE registers. */
29237 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29238 reg_alloc_order [pos++] = i;
29239 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29240 reg_alloc_order [pos++] = i;
29241
29242 /* x87 registers. */
29243 if (TARGET_SSE_MATH)
29244 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29245 reg_alloc_order [pos++] = i;
29246
29247 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29248 reg_alloc_order [pos++] = i;
29249
29250 /* Initialize the rest of array as we do not allocate some registers
29251 at all. */
29252 while (pos < FIRST_PSEUDO_REGISTER)
29253 reg_alloc_order [pos++] = 0;
29254 }
29255
29256 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29257 in struct attribute_spec handler. */
29258 static tree
29259 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29260 tree args,
29261 int flags ATTRIBUTE_UNUSED,
29262 bool *no_add_attrs)
29263 {
29264 if (TREE_CODE (*node) != FUNCTION_TYPE
29265 && TREE_CODE (*node) != METHOD_TYPE
29266 && TREE_CODE (*node) != FIELD_DECL
29267 && TREE_CODE (*node) != TYPE_DECL)
29268 {
29269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29270 name);
29271 *no_add_attrs = true;
29272 return NULL_TREE;
29273 }
29274 if (TARGET_64BIT)
29275 {
29276 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29277 name);
29278 *no_add_attrs = true;
29279 return NULL_TREE;
29280 }
29281 if (is_attribute_p ("callee_pop_aggregate_return", name))
29282 {
29283 tree cst;
29284
29285 cst = TREE_VALUE (args);
29286 if (TREE_CODE (cst) != INTEGER_CST)
29287 {
29288 warning (OPT_Wattributes,
29289 "%qE attribute requires an integer constant argument",
29290 name);
29291 *no_add_attrs = true;
29292 }
29293 else if (compare_tree_int (cst, 0) != 0
29294 && compare_tree_int (cst, 1) != 0)
29295 {
29296 warning (OPT_Wattributes,
29297 "argument to %qE attribute is neither zero, nor one",
29298 name);
29299 *no_add_attrs = true;
29300 }
29301
29302 return NULL_TREE;
29303 }
29304
29305 return NULL_TREE;
29306 }
29307
29308 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29309 struct attribute_spec.handler. */
29310 static tree
29311 ix86_handle_abi_attribute (tree *node, tree name,
29312 tree args ATTRIBUTE_UNUSED,
29313 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29314 {
29315 if (TREE_CODE (*node) != FUNCTION_TYPE
29316 && TREE_CODE (*node) != METHOD_TYPE
29317 && TREE_CODE (*node) != FIELD_DECL
29318 && TREE_CODE (*node) != TYPE_DECL)
29319 {
29320 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29321 name);
29322 *no_add_attrs = true;
29323 return NULL_TREE;
29324 }
29325
29326 /* Can combine regparm with all attributes but fastcall. */
29327 if (is_attribute_p ("ms_abi", name))
29328 {
29329 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29330 {
29331 error ("ms_abi and sysv_abi attributes are not compatible");
29332 }
29333
29334 return NULL_TREE;
29335 }
29336 else if (is_attribute_p ("sysv_abi", name))
29337 {
29338 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29339 {
29340 error ("ms_abi and sysv_abi attributes are not compatible");
29341 }
29342
29343 return NULL_TREE;
29344 }
29345
29346 return NULL_TREE;
29347 }
29348
29349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29350 struct attribute_spec.handler. */
29351 static tree
29352 ix86_handle_struct_attribute (tree *node, tree name,
29353 tree args ATTRIBUTE_UNUSED,
29354 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29355 {
29356 tree *type = NULL;
29357 if (DECL_P (*node))
29358 {
29359 if (TREE_CODE (*node) == TYPE_DECL)
29360 type = &TREE_TYPE (*node);
29361 }
29362 else
29363 type = node;
29364
29365 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29366 || TREE_CODE (*type) == UNION_TYPE)))
29367 {
29368 warning (OPT_Wattributes, "%qE attribute ignored",
29369 name);
29370 *no_add_attrs = true;
29371 }
29372
29373 else if ((is_attribute_p ("ms_struct", name)
29374 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29375 || ((is_attribute_p ("gcc_struct", name)
29376 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29377 {
29378 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29379 name);
29380 *no_add_attrs = true;
29381 }
29382
29383 return NULL_TREE;
29384 }
29385
29386 static tree
29387 ix86_handle_fndecl_attribute (tree *node, tree name,
29388 tree args ATTRIBUTE_UNUSED,
29389 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29390 {
29391 if (TREE_CODE (*node) != FUNCTION_DECL)
29392 {
29393 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29394 name);
29395 *no_add_attrs = true;
29396 }
29397 return NULL_TREE;
29398 }
29399
29400 static bool
29401 ix86_ms_bitfield_layout_p (const_tree record_type)
29402 {
29403 return ((TARGET_MS_BITFIELD_LAYOUT
29404 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29405 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29406 }
29407
29408 /* Returns an expression indicating where the this parameter is
29409 located on entry to the FUNCTION. */
29410
29411 static rtx
29412 x86_this_parameter (tree function)
29413 {
29414 tree type = TREE_TYPE (function);
29415 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29416 int nregs;
29417
29418 if (TARGET_64BIT)
29419 {
29420 const int *parm_regs;
29421
29422 if (ix86_function_type_abi (type) == MS_ABI)
29423 parm_regs = x86_64_ms_abi_int_parameter_registers;
29424 else
29425 parm_regs = x86_64_int_parameter_registers;
29426 return gen_rtx_REG (DImode, parm_regs[aggr]);
29427 }
29428
29429 nregs = ix86_function_regparm (type, function);
29430
29431 if (nregs > 0 && !stdarg_p (type))
29432 {
29433 int regno;
29434 unsigned int ccvt = ix86_get_callcvt (type);
29435
29436 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29437 regno = aggr ? DX_REG : CX_REG;
29438 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29439 {
29440 regno = CX_REG;
29441 if (aggr)
29442 return gen_rtx_MEM (SImode,
29443 plus_constant (stack_pointer_rtx, 4));
29444 }
29445 else
29446 {
29447 regno = AX_REG;
29448 if (aggr)
29449 {
29450 regno = DX_REG;
29451 if (nregs == 1)
29452 return gen_rtx_MEM (SImode,
29453 plus_constant (stack_pointer_rtx, 4));
29454 }
29455 }
29456 return gen_rtx_REG (SImode, regno);
29457 }
29458
29459 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29460 }
29461
29462 /* Determine whether x86_output_mi_thunk can succeed. */
29463
29464 static bool
29465 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29466 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29467 HOST_WIDE_INT vcall_offset, const_tree function)
29468 {
29469 /* 64-bit can handle anything. */
29470 if (TARGET_64BIT)
29471 return true;
29472
29473 /* For 32-bit, everything's fine if we have one free register. */
29474 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29475 return true;
29476
29477 /* Need a free register for vcall_offset. */
29478 if (vcall_offset)
29479 return false;
29480
29481 /* Need a free register for GOT references. */
29482 if (flag_pic && !targetm.binds_local_p (function))
29483 return false;
29484
29485 /* Otherwise ok. */
29486 return true;
29487 }
29488
29489 /* Output the assembler code for a thunk function. THUNK_DECL is the
29490 declaration for the thunk function itself, FUNCTION is the decl for
29491 the target function. DELTA is an immediate constant offset to be
29492 added to THIS. If VCALL_OFFSET is nonzero, the word at
29493 *(*this + vcall_offset) should be added to THIS. */
29494
29495 static void
29496 x86_output_mi_thunk (FILE *file,
29497 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29498 HOST_WIDE_INT vcall_offset, tree function)
29499 {
29500 rtx this_param = x86_this_parameter (function);
29501 rtx this_reg, tmp, fnaddr;
29502
29503 emit_note (NOTE_INSN_PROLOGUE_END);
29504
29505 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29506 pull it in now and let DELTA benefit. */
29507 if (REG_P (this_param))
29508 this_reg = this_param;
29509 else if (vcall_offset)
29510 {
29511 /* Put the this parameter into %eax. */
29512 this_reg = gen_rtx_REG (Pmode, AX_REG);
29513 emit_move_insn (this_reg, this_param);
29514 }
29515 else
29516 this_reg = NULL_RTX;
29517
29518 /* Adjust the this parameter by a fixed constant. */
29519 if (delta)
29520 {
29521 rtx delta_rtx = GEN_INT (delta);
29522 rtx delta_dst = this_reg ? this_reg : this_param;
29523
29524 if (TARGET_64BIT)
29525 {
29526 if (!x86_64_general_operand (delta_rtx, Pmode))
29527 {
29528 tmp = gen_rtx_REG (Pmode, R10_REG);
29529 emit_move_insn (tmp, delta_rtx);
29530 delta_rtx = tmp;
29531 }
29532 }
29533
29534 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29535 }
29536
29537 /* Adjust the this parameter by a value stored in the vtable. */
29538 if (vcall_offset)
29539 {
29540 rtx vcall_addr, vcall_mem, this_mem;
29541 unsigned int tmp_regno;
29542
29543 if (TARGET_64BIT)
29544 tmp_regno = R10_REG;
29545 else
29546 {
29547 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29548 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29549 tmp_regno = AX_REG;
29550 else
29551 tmp_regno = CX_REG;
29552 }
29553 tmp = gen_rtx_REG (Pmode, tmp_regno);
29554
29555 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29556 if (Pmode != ptr_mode)
29557 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29558 emit_move_insn (tmp, this_mem);
29559
29560 /* Adjust the this parameter. */
29561 vcall_addr = plus_constant (tmp, vcall_offset);
29562 if (TARGET_64BIT
29563 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29564 {
29565 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29566 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29567 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29568 }
29569
29570 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29571 if (Pmode != ptr_mode)
29572 emit_insn (gen_addsi_1_zext (this_reg,
29573 gen_rtx_REG (ptr_mode,
29574 REGNO (this_reg)),
29575 vcall_mem));
29576 else
29577 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29578 }
29579
29580 /* If necessary, drop THIS back to its stack slot. */
29581 if (this_reg && this_reg != this_param)
29582 emit_move_insn (this_param, this_reg);
29583
29584 fnaddr = XEXP (DECL_RTL (function), 0);
29585 if (TARGET_64BIT)
29586 {
29587 if (!flag_pic || targetm.binds_local_p (function)
29588 || cfun->machine->call_abi == MS_ABI)
29589 ;
29590 else
29591 {
29592 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29593 tmp = gen_rtx_CONST (Pmode, tmp);
29594 fnaddr = gen_rtx_MEM (Pmode, tmp);
29595 }
29596 }
29597 else
29598 {
29599 if (!flag_pic || targetm.binds_local_p (function))
29600 ;
29601 #if TARGET_MACHO
29602 else if (TARGET_MACHO)
29603 {
29604 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29605 fnaddr = XEXP (fnaddr, 0);
29606 }
29607 #endif /* TARGET_MACHO */
29608 else
29609 {
29610 tmp = gen_rtx_REG (Pmode, CX_REG);
29611 output_set_got (tmp, NULL_RTX);
29612
29613 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29614 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29615 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29616 }
29617 }
29618
29619 /* Our sibling call patterns do not allow memories, because we have no
29620 predicate that can distinguish between frame and non-frame memory.
29621 For our purposes here, we can get away with (ab)using a jump pattern,
29622 because we're going to do no optimization. */
29623 if (MEM_P (fnaddr))
29624 emit_jump_insn (gen_indirect_jump (fnaddr));
29625 else
29626 {
29627 tmp = gen_rtx_MEM (QImode, fnaddr);
29628 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29629 tmp = emit_call_insn (tmp);
29630 SIBLING_CALL_P (tmp) = 1;
29631 }
29632 emit_barrier ();
29633
29634 /* Emit just enough of rest_of_compilation to get the insns emitted.
29635 Note that use_thunk calls assemble_start_function et al. */
29636 tmp = get_insns ();
29637 insn_locators_alloc ();
29638 shorten_branches (tmp);
29639 final_start_function (tmp, file, 1);
29640 final (tmp, file, 1);
29641 final_end_function ();
29642 }
29643
29644 static void
29645 x86_file_start (void)
29646 {
29647 default_file_start ();
29648 #if TARGET_MACHO
29649 darwin_file_start ();
29650 #endif
29651 if (X86_FILE_START_VERSION_DIRECTIVE)
29652 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29653 if (X86_FILE_START_FLTUSED)
29654 fputs ("\t.global\t__fltused\n", asm_out_file);
29655 if (ix86_asm_dialect == ASM_INTEL)
29656 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29657 }
29658
29659 int
29660 x86_field_alignment (tree field, int computed)
29661 {
29662 enum machine_mode mode;
29663 tree type = TREE_TYPE (field);
29664
29665 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29666 return computed;
29667 mode = TYPE_MODE (strip_array_types (type));
29668 if (mode == DFmode || mode == DCmode
29669 || GET_MODE_CLASS (mode) == MODE_INT
29670 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29671 return MIN (32, computed);
29672 return computed;
29673 }
29674
29675 /* Output assembler code to FILE to increment profiler label # LABELNO
29676 for profiling a function entry. */
29677 void
29678 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29679 {
29680 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29681 : MCOUNT_NAME);
29682
29683 if (TARGET_64BIT)
29684 {
29685 #ifndef NO_PROFILE_COUNTERS
29686 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29687 #endif
29688
29689 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29690 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29691 else
29692 fprintf (file, "\tcall\t%s\n", mcount_name);
29693 }
29694 else if (flag_pic)
29695 {
29696 #ifndef NO_PROFILE_COUNTERS
29697 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29698 LPREFIX, labelno);
29699 #endif
29700 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29701 }
29702 else
29703 {
29704 #ifndef NO_PROFILE_COUNTERS
29705 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29706 LPREFIX, labelno);
29707 #endif
29708 fprintf (file, "\tcall\t%s\n", mcount_name);
29709 }
29710 }
29711
29712 /* We don't have exact information about the insn sizes, but we may assume
29713 quite safely that we are informed about all 1 byte insns and memory
29714 address sizes. This is enough to eliminate unnecessary padding in
29715 99% of cases. */
29716
29717 static int
29718 min_insn_size (rtx insn)
29719 {
29720 int l = 0, len;
29721
29722 if (!INSN_P (insn) || !active_insn_p (insn))
29723 return 0;
29724
29725 /* Discard alignments we've emit and jump instructions. */
29726 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29727 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29728 return 0;
29729 if (JUMP_TABLE_DATA_P (insn))
29730 return 0;
29731
29732 /* Important case - calls are always 5 bytes.
29733 It is common to have many calls in the row. */
29734 if (CALL_P (insn)
29735 && symbolic_reference_mentioned_p (PATTERN (insn))
29736 && !SIBLING_CALL_P (insn))
29737 return 5;
29738 len = get_attr_length (insn);
29739 if (len <= 1)
29740 return 1;
29741
29742 /* For normal instructions we rely on get_attr_length being exact,
29743 with a few exceptions. */
29744 if (!JUMP_P (insn))
29745 {
29746 enum attr_type type = get_attr_type (insn);
29747
29748 switch (type)
29749 {
29750 case TYPE_MULTI:
29751 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29752 || asm_noperands (PATTERN (insn)) >= 0)
29753 return 0;
29754 break;
29755 case TYPE_OTHER:
29756 case TYPE_FCMP:
29757 break;
29758 default:
29759 /* Otherwise trust get_attr_length. */
29760 return len;
29761 }
29762
29763 l = get_attr_length_address (insn);
29764 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29765 l = 4;
29766 }
29767 if (l)
29768 return 1+l;
29769 else
29770 return 2;
29771 }
29772
29773 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29774
29775 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29776 window. */
29777
29778 static void
29779 ix86_avoid_jump_mispredicts (void)
29780 {
29781 rtx insn, start = get_insns ();
29782 int nbytes = 0, njumps = 0;
29783 int isjump = 0;
29784
29785 /* Look for all minimal intervals of instructions containing 4 jumps.
29786 The intervals are bounded by START and INSN. NBYTES is the total
29787 size of instructions in the interval including INSN and not including
29788 START. When the NBYTES is smaller than 16 bytes, it is possible
29789 that the end of START and INSN ends up in the same 16byte page.
29790
29791 The smallest offset in the page INSN can start is the case where START
29792 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29793 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29794 */
29795 for (insn = start; insn; insn = NEXT_INSN (insn))
29796 {
29797 int min_size;
29798
29799 if (LABEL_P (insn))
29800 {
29801 int align = label_to_alignment (insn);
29802 int max_skip = label_to_max_skip (insn);
29803
29804 if (max_skip > 15)
29805 max_skip = 15;
29806 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29807 already in the current 16 byte page, because otherwise
29808 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29809 bytes to reach 16 byte boundary. */
29810 if (align <= 0
29811 || (align <= 3 && max_skip != (1 << align) - 1))
29812 max_skip = 0;
29813 if (dump_file)
29814 fprintf (dump_file, "Label %i with max_skip %i\n",
29815 INSN_UID (insn), max_skip);
29816 if (max_skip)
29817 {
29818 while (nbytes + max_skip >= 16)
29819 {
29820 start = NEXT_INSN (start);
29821 if ((JUMP_P (start)
29822 && GET_CODE (PATTERN (start)) != ADDR_VEC
29823 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29824 || CALL_P (start))
29825 njumps--, isjump = 1;
29826 else
29827 isjump = 0;
29828 nbytes -= min_insn_size (start);
29829 }
29830 }
29831 continue;
29832 }
29833
29834 min_size = min_insn_size (insn);
29835 nbytes += min_size;
29836 if (dump_file)
29837 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29838 INSN_UID (insn), min_size);
29839 if ((JUMP_P (insn)
29840 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29841 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29842 || CALL_P (insn))
29843 njumps++;
29844 else
29845 continue;
29846
29847 while (njumps > 3)
29848 {
29849 start = NEXT_INSN (start);
29850 if ((JUMP_P (start)
29851 && GET_CODE (PATTERN (start)) != ADDR_VEC
29852 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29853 || CALL_P (start))
29854 njumps--, isjump = 1;
29855 else
29856 isjump = 0;
29857 nbytes -= min_insn_size (start);
29858 }
29859 gcc_assert (njumps >= 0);
29860 if (dump_file)
29861 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29862 INSN_UID (start), INSN_UID (insn), nbytes);
29863
29864 if (njumps == 3 && isjump && nbytes < 16)
29865 {
29866 int padsize = 15 - nbytes + min_insn_size (insn);
29867
29868 if (dump_file)
29869 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29870 INSN_UID (insn), padsize);
29871 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29872 }
29873 }
29874 }
29875 #endif
29876
29877 /* AMD Athlon works faster
29878 when RET is not destination of conditional jump or directly preceded
29879 by other jump instruction. We avoid the penalty by inserting NOP just
29880 before the RET instructions in such cases. */
29881 static void
29882 ix86_pad_returns (void)
29883 {
29884 edge e;
29885 edge_iterator ei;
29886
29887 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29888 {
29889 basic_block bb = e->src;
29890 rtx ret = BB_END (bb);
29891 rtx prev;
29892 bool replace = false;
29893
29894 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29895 || optimize_bb_for_size_p (bb))
29896 continue;
29897 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29898 if (active_insn_p (prev) || LABEL_P (prev))
29899 break;
29900 if (prev && LABEL_P (prev))
29901 {
29902 edge e;
29903 edge_iterator ei;
29904
29905 FOR_EACH_EDGE (e, ei, bb->preds)
29906 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29907 && !(e->flags & EDGE_FALLTHRU))
29908 replace = true;
29909 }
29910 if (!replace)
29911 {
29912 prev = prev_active_insn (ret);
29913 if (prev
29914 && ((JUMP_P (prev) && any_condjump_p (prev))
29915 || CALL_P (prev)))
29916 replace = true;
29917 /* Empty functions get branch mispredict even when
29918 the jump destination is not visible to us. */
29919 if (!prev && !optimize_function_for_size_p (cfun))
29920 replace = true;
29921 }
29922 if (replace)
29923 {
29924 emit_jump_insn_before (gen_return_internal_long (), ret);
29925 delete_insn (ret);
29926 }
29927 }
29928 }
29929
29930 /* Count the minimum number of instructions in BB. Return 4 if the
29931 number of instructions >= 4. */
29932
29933 static int
29934 ix86_count_insn_bb (basic_block bb)
29935 {
29936 rtx insn;
29937 int insn_count = 0;
29938
29939 /* Count number of instructions in this block. Return 4 if the number
29940 of instructions >= 4. */
29941 FOR_BB_INSNS (bb, insn)
29942 {
29943 /* Only happen in exit blocks. */
29944 if (JUMP_P (insn)
29945 && GET_CODE (PATTERN (insn)) == RETURN)
29946 break;
29947
29948 if (NONDEBUG_INSN_P (insn)
29949 && GET_CODE (PATTERN (insn)) != USE
29950 && GET_CODE (PATTERN (insn)) != CLOBBER)
29951 {
29952 insn_count++;
29953 if (insn_count >= 4)
29954 return insn_count;
29955 }
29956 }
29957
29958 return insn_count;
29959 }
29960
29961
29962 /* Count the minimum number of instructions in code path in BB.
29963 Return 4 if the number of instructions >= 4. */
29964
29965 static int
29966 ix86_count_insn (basic_block bb)
29967 {
29968 edge e;
29969 edge_iterator ei;
29970 int min_prev_count;
29971
29972 /* Only bother counting instructions along paths with no
29973 more than 2 basic blocks between entry and exit. Given
29974 that BB has an edge to exit, determine if a predecessor
29975 of BB has an edge from entry. If so, compute the number
29976 of instructions in the predecessor block. If there
29977 happen to be multiple such blocks, compute the minimum. */
29978 min_prev_count = 4;
29979 FOR_EACH_EDGE (e, ei, bb->preds)
29980 {
29981 edge prev_e;
29982 edge_iterator prev_ei;
29983
29984 if (e->src == ENTRY_BLOCK_PTR)
29985 {
29986 min_prev_count = 0;
29987 break;
29988 }
29989 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29990 {
29991 if (prev_e->src == ENTRY_BLOCK_PTR)
29992 {
29993 int count = ix86_count_insn_bb (e->src);
29994 if (count < min_prev_count)
29995 min_prev_count = count;
29996 break;
29997 }
29998 }
29999 }
30000
30001 if (min_prev_count < 4)
30002 min_prev_count += ix86_count_insn_bb (bb);
30003
30004 return min_prev_count;
30005 }
30006
30007 /* Pad short funtion to 4 instructions. */
30008
30009 static void
30010 ix86_pad_short_function (void)
30011 {
30012 edge e;
30013 edge_iterator ei;
30014
30015 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30016 {
30017 rtx ret = BB_END (e->src);
30018 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30019 {
30020 int insn_count = ix86_count_insn (e->src);
30021
30022 /* Pad short function. */
30023 if (insn_count < 4)
30024 {
30025 rtx insn = ret;
30026
30027 /* Find epilogue. */
30028 while (insn
30029 && (!NOTE_P (insn)
30030 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30031 insn = PREV_INSN (insn);
30032
30033 if (!insn)
30034 insn = ret;
30035
30036 /* Two NOPs count as one instruction. */
30037 insn_count = 2 * (4 - insn_count);
30038 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30039 }
30040 }
30041 }
30042 }
30043
30044 /* Implement machine specific optimizations. We implement padding of returns
30045 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30046 static void
30047 ix86_reorg (void)
30048 {
30049 /* We are freeing block_for_insn in the toplev to keep compatibility
30050 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30051 compute_bb_for_insn ();
30052
30053 /* Run the vzeroupper optimization if needed. */
30054 if (TARGET_VZEROUPPER)
30055 move_or_delete_vzeroupper ();
30056
30057 if (optimize && optimize_function_for_speed_p (cfun))
30058 {
30059 if (TARGET_PAD_SHORT_FUNCTION)
30060 ix86_pad_short_function ();
30061 else if (TARGET_PAD_RETURNS)
30062 ix86_pad_returns ();
30063 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30064 if (TARGET_FOUR_JUMP_LIMIT)
30065 ix86_avoid_jump_mispredicts ();
30066 #endif
30067 }
30068 }
30069
30070 /* Return nonzero when QImode register that must be represented via REX prefix
30071 is used. */
30072 bool
30073 x86_extended_QIreg_mentioned_p (rtx insn)
30074 {
30075 int i;
30076 extract_insn_cached (insn);
30077 for (i = 0; i < recog_data.n_operands; i++)
30078 if (REG_P (recog_data.operand[i])
30079 && REGNO (recog_data.operand[i]) > BX_REG)
30080 return true;
30081 return false;
30082 }
30083
30084 /* Return nonzero when P points to register encoded via REX prefix.
30085 Called via for_each_rtx. */
30086 static int
30087 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30088 {
30089 unsigned int regno;
30090 if (!REG_P (*p))
30091 return 0;
30092 regno = REGNO (*p);
30093 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30094 }
30095
30096 /* Return true when INSN mentions register that must be encoded using REX
30097 prefix. */
30098 bool
30099 x86_extended_reg_mentioned_p (rtx insn)
30100 {
30101 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30102 extended_reg_mentioned_1, NULL);
30103 }
30104
30105 /* If profitable, negate (without causing overflow) integer constant
30106 of mode MODE at location LOC. Return true in this case. */
30107 bool
30108 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30109 {
30110 HOST_WIDE_INT val;
30111
30112 if (!CONST_INT_P (*loc))
30113 return false;
30114
30115 switch (mode)
30116 {
30117 case DImode:
30118 /* DImode x86_64 constants must fit in 32 bits. */
30119 gcc_assert (x86_64_immediate_operand (*loc, mode));
30120
30121 mode = SImode;
30122 break;
30123
30124 case SImode:
30125 case HImode:
30126 case QImode:
30127 break;
30128
30129 default:
30130 gcc_unreachable ();
30131 }
30132
30133 /* Avoid overflows. */
30134 if (mode_signbit_p (mode, *loc))
30135 return false;
30136
30137 val = INTVAL (*loc);
30138
30139 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30140 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30141 if ((val < 0 && val != -128)
30142 || val == 128)
30143 {
30144 *loc = GEN_INT (-val);
30145 return true;
30146 }
30147
30148 return false;
30149 }
30150
30151 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30152 optabs would emit if we didn't have TFmode patterns. */
30153
30154 void
30155 x86_emit_floatuns (rtx operands[2])
30156 {
30157 rtx neglab, donelab, i0, i1, f0, in, out;
30158 enum machine_mode mode, inmode;
30159
30160 inmode = GET_MODE (operands[1]);
30161 gcc_assert (inmode == SImode || inmode == DImode);
30162
30163 out = operands[0];
30164 in = force_reg (inmode, operands[1]);
30165 mode = GET_MODE (out);
30166 neglab = gen_label_rtx ();
30167 donelab = gen_label_rtx ();
30168 f0 = gen_reg_rtx (mode);
30169
30170 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30171
30172 expand_float (out, in, 0);
30173
30174 emit_jump_insn (gen_jump (donelab));
30175 emit_barrier ();
30176
30177 emit_label (neglab);
30178
30179 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30180 1, OPTAB_DIRECT);
30181 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30182 1, OPTAB_DIRECT);
30183 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30184
30185 expand_float (f0, i0, 0);
30186
30187 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30188
30189 emit_label (donelab);
30190 }
30191 \f
30192 /* AVX does not support 32-byte integer vector operations,
30193 thus the longest vector we are faced with is V16QImode. */
30194 #define MAX_VECT_LEN 16
30195
30196 struct expand_vec_perm_d
30197 {
30198 rtx target, op0, op1;
30199 unsigned char perm[MAX_VECT_LEN];
30200 enum machine_mode vmode;
30201 unsigned char nelt;
30202 bool testing_p;
30203 };
30204
30205 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30206 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30207
30208 /* Get a vector mode of the same size as the original but with elements
30209 twice as wide. This is only guaranteed to apply to integral vectors. */
30210
30211 static inline enum machine_mode
30212 get_mode_wider_vector (enum machine_mode o)
30213 {
30214 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30215 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30216 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30217 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30218 return n;
30219 }
30220
30221 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30222 with all elements equal to VAR. Return true if successful. */
30223
30224 static bool
30225 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30226 rtx target, rtx val)
30227 {
30228 bool ok;
30229
30230 switch (mode)
30231 {
30232 case V2SImode:
30233 case V2SFmode:
30234 if (!mmx_ok)
30235 return false;
30236 /* FALLTHRU */
30237
30238 case V4DFmode:
30239 case V4DImode:
30240 case V8SFmode:
30241 case V8SImode:
30242 case V2DFmode:
30243 case V2DImode:
30244 case V4SFmode:
30245 case V4SImode:
30246 {
30247 rtx insn, dup;
30248
30249 /* First attempt to recognize VAL as-is. */
30250 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30251 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30252 if (recog_memoized (insn) < 0)
30253 {
30254 rtx seq;
30255 /* If that fails, force VAL into a register. */
30256
30257 start_sequence ();
30258 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30259 seq = get_insns ();
30260 end_sequence ();
30261 if (seq)
30262 emit_insn_before (seq, insn);
30263
30264 ok = recog_memoized (insn) >= 0;
30265 gcc_assert (ok);
30266 }
30267 }
30268 return true;
30269
30270 case V4HImode:
30271 if (!mmx_ok)
30272 return false;
30273 if (TARGET_SSE || TARGET_3DNOW_A)
30274 {
30275 rtx x;
30276
30277 val = gen_lowpart (SImode, val);
30278 x = gen_rtx_TRUNCATE (HImode, val);
30279 x = gen_rtx_VEC_DUPLICATE (mode, x);
30280 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30281 return true;
30282 }
30283 goto widen;
30284
30285 case V8QImode:
30286 if (!mmx_ok)
30287 return false;
30288 goto widen;
30289
30290 case V8HImode:
30291 if (TARGET_SSE2)
30292 {
30293 struct expand_vec_perm_d dperm;
30294 rtx tmp1, tmp2;
30295
30296 permute:
30297 memset (&dperm, 0, sizeof (dperm));
30298 dperm.target = target;
30299 dperm.vmode = mode;
30300 dperm.nelt = GET_MODE_NUNITS (mode);
30301 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30302
30303 /* Extend to SImode using a paradoxical SUBREG. */
30304 tmp1 = gen_reg_rtx (SImode);
30305 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30306
30307 /* Insert the SImode value as low element of a V4SImode vector. */
30308 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30309 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30310
30311 ok = (expand_vec_perm_1 (&dperm)
30312 || expand_vec_perm_broadcast_1 (&dperm));
30313 gcc_assert (ok);
30314 return ok;
30315 }
30316 goto widen;
30317
30318 case V16QImode:
30319 if (TARGET_SSE2)
30320 goto permute;
30321 goto widen;
30322
30323 widen:
30324 /* Replicate the value once into the next wider mode and recurse. */
30325 {
30326 enum machine_mode smode, wsmode, wvmode;
30327 rtx x;
30328
30329 smode = GET_MODE_INNER (mode);
30330 wvmode = get_mode_wider_vector (mode);
30331 wsmode = GET_MODE_INNER (wvmode);
30332
30333 val = convert_modes (wsmode, smode, val, true);
30334 x = expand_simple_binop (wsmode, ASHIFT, val,
30335 GEN_INT (GET_MODE_BITSIZE (smode)),
30336 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30337 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30338
30339 x = gen_lowpart (wvmode, target);
30340 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30341 gcc_assert (ok);
30342 return ok;
30343 }
30344
30345 case V16HImode:
30346 case V32QImode:
30347 {
30348 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30349 rtx x = gen_reg_rtx (hvmode);
30350
30351 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30352 gcc_assert (ok);
30353
30354 x = gen_rtx_VEC_CONCAT (mode, x, x);
30355 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30356 }
30357 return true;
30358
30359 default:
30360 return false;
30361 }
30362 }
30363
30364 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30365 whose ONE_VAR element is VAR, and other elements are zero. Return true
30366 if successful. */
30367
30368 static bool
30369 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30370 rtx target, rtx var, int one_var)
30371 {
30372 enum machine_mode vsimode;
30373 rtx new_target;
30374 rtx x, tmp;
30375 bool use_vector_set = false;
30376
30377 switch (mode)
30378 {
30379 case V2DImode:
30380 /* For SSE4.1, we normally use vector set. But if the second
30381 element is zero and inter-unit moves are OK, we use movq
30382 instead. */
30383 use_vector_set = (TARGET_64BIT
30384 && TARGET_SSE4_1
30385 && !(TARGET_INTER_UNIT_MOVES
30386 && one_var == 0));
30387 break;
30388 case V16QImode:
30389 case V4SImode:
30390 case V4SFmode:
30391 use_vector_set = TARGET_SSE4_1;
30392 break;
30393 case V8HImode:
30394 use_vector_set = TARGET_SSE2;
30395 break;
30396 case V4HImode:
30397 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30398 break;
30399 case V32QImode:
30400 case V16HImode:
30401 case V8SImode:
30402 case V8SFmode:
30403 case V4DFmode:
30404 use_vector_set = TARGET_AVX;
30405 break;
30406 case V4DImode:
30407 /* Use ix86_expand_vector_set in 64bit mode only. */
30408 use_vector_set = TARGET_AVX && TARGET_64BIT;
30409 break;
30410 default:
30411 break;
30412 }
30413
30414 if (use_vector_set)
30415 {
30416 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30417 var = force_reg (GET_MODE_INNER (mode), var);
30418 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30419 return true;
30420 }
30421
30422 switch (mode)
30423 {
30424 case V2SFmode:
30425 case V2SImode:
30426 if (!mmx_ok)
30427 return false;
30428 /* FALLTHRU */
30429
30430 case V2DFmode:
30431 case V2DImode:
30432 if (one_var != 0)
30433 return false;
30434 var = force_reg (GET_MODE_INNER (mode), var);
30435 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30436 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30437 return true;
30438
30439 case V4SFmode:
30440 case V4SImode:
30441 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30442 new_target = gen_reg_rtx (mode);
30443 else
30444 new_target = target;
30445 var = force_reg (GET_MODE_INNER (mode), var);
30446 x = gen_rtx_VEC_DUPLICATE (mode, var);
30447 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30448 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30449 if (one_var != 0)
30450 {
30451 /* We need to shuffle the value to the correct position, so
30452 create a new pseudo to store the intermediate result. */
30453
30454 /* With SSE2, we can use the integer shuffle insns. */
30455 if (mode != V4SFmode && TARGET_SSE2)
30456 {
30457 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30458 const1_rtx,
30459 GEN_INT (one_var == 1 ? 0 : 1),
30460 GEN_INT (one_var == 2 ? 0 : 1),
30461 GEN_INT (one_var == 3 ? 0 : 1)));
30462 if (target != new_target)
30463 emit_move_insn (target, new_target);
30464 return true;
30465 }
30466
30467 /* Otherwise convert the intermediate result to V4SFmode and
30468 use the SSE1 shuffle instructions. */
30469 if (mode != V4SFmode)
30470 {
30471 tmp = gen_reg_rtx (V4SFmode);
30472 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30473 }
30474 else
30475 tmp = new_target;
30476
30477 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30478 const1_rtx,
30479 GEN_INT (one_var == 1 ? 0 : 1),
30480 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30481 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30482
30483 if (mode != V4SFmode)
30484 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30485 else if (tmp != target)
30486 emit_move_insn (target, tmp);
30487 }
30488 else if (target != new_target)
30489 emit_move_insn (target, new_target);
30490 return true;
30491
30492 case V8HImode:
30493 case V16QImode:
30494 vsimode = V4SImode;
30495 goto widen;
30496 case V4HImode:
30497 case V8QImode:
30498 if (!mmx_ok)
30499 return false;
30500 vsimode = V2SImode;
30501 goto widen;
30502 widen:
30503 if (one_var != 0)
30504 return false;
30505
30506 /* Zero extend the variable element to SImode and recurse. */
30507 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30508
30509 x = gen_reg_rtx (vsimode);
30510 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30511 var, one_var))
30512 gcc_unreachable ();
30513
30514 emit_move_insn (target, gen_lowpart (mode, x));
30515 return true;
30516
30517 default:
30518 return false;
30519 }
30520 }
30521
30522 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30523 consisting of the values in VALS. It is known that all elements
30524 except ONE_VAR are constants. Return true if successful. */
30525
30526 static bool
30527 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30528 rtx target, rtx vals, int one_var)
30529 {
30530 rtx var = XVECEXP (vals, 0, one_var);
30531 enum machine_mode wmode;
30532 rtx const_vec, x;
30533
30534 const_vec = copy_rtx (vals);
30535 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30536 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30537
30538 switch (mode)
30539 {
30540 case V2DFmode:
30541 case V2DImode:
30542 case V2SFmode:
30543 case V2SImode:
30544 /* For the two element vectors, it's just as easy to use
30545 the general case. */
30546 return false;
30547
30548 case V4DImode:
30549 /* Use ix86_expand_vector_set in 64bit mode only. */
30550 if (!TARGET_64BIT)
30551 return false;
30552 case V4DFmode:
30553 case V8SFmode:
30554 case V8SImode:
30555 case V16HImode:
30556 case V32QImode:
30557 case V4SFmode:
30558 case V4SImode:
30559 case V8HImode:
30560 case V4HImode:
30561 break;
30562
30563 case V16QImode:
30564 if (TARGET_SSE4_1)
30565 break;
30566 wmode = V8HImode;
30567 goto widen;
30568 case V8QImode:
30569 wmode = V4HImode;
30570 goto widen;
30571 widen:
30572 /* There's no way to set one QImode entry easily. Combine
30573 the variable value with its adjacent constant value, and
30574 promote to an HImode set. */
30575 x = XVECEXP (vals, 0, one_var ^ 1);
30576 if (one_var & 1)
30577 {
30578 var = convert_modes (HImode, QImode, var, true);
30579 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30580 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30581 x = GEN_INT (INTVAL (x) & 0xff);
30582 }
30583 else
30584 {
30585 var = convert_modes (HImode, QImode, var, true);
30586 x = gen_int_mode (INTVAL (x) << 8, HImode);
30587 }
30588 if (x != const0_rtx)
30589 var = expand_simple_binop (HImode, IOR, var, x, var,
30590 1, OPTAB_LIB_WIDEN);
30591
30592 x = gen_reg_rtx (wmode);
30593 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30594 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30595
30596 emit_move_insn (target, gen_lowpart (mode, x));
30597 return true;
30598
30599 default:
30600 return false;
30601 }
30602
30603 emit_move_insn (target, const_vec);
30604 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30605 return true;
30606 }
30607
30608 /* A subroutine of ix86_expand_vector_init_general. Use vector
30609 concatenate to handle the most general case: all values variable,
30610 and none identical. */
30611
30612 static void
30613 ix86_expand_vector_init_concat (enum machine_mode mode,
30614 rtx target, rtx *ops, int n)
30615 {
30616 enum machine_mode cmode, hmode = VOIDmode;
30617 rtx first[8], second[4];
30618 rtvec v;
30619 int i, j;
30620
30621 switch (n)
30622 {
30623 case 2:
30624 switch (mode)
30625 {
30626 case V8SImode:
30627 cmode = V4SImode;
30628 break;
30629 case V8SFmode:
30630 cmode = V4SFmode;
30631 break;
30632 case V4DImode:
30633 cmode = V2DImode;
30634 break;
30635 case V4DFmode:
30636 cmode = V2DFmode;
30637 break;
30638 case V4SImode:
30639 cmode = V2SImode;
30640 break;
30641 case V4SFmode:
30642 cmode = V2SFmode;
30643 break;
30644 case V2DImode:
30645 cmode = DImode;
30646 break;
30647 case V2SImode:
30648 cmode = SImode;
30649 break;
30650 case V2DFmode:
30651 cmode = DFmode;
30652 break;
30653 case V2SFmode:
30654 cmode = SFmode;
30655 break;
30656 default:
30657 gcc_unreachable ();
30658 }
30659
30660 if (!register_operand (ops[1], cmode))
30661 ops[1] = force_reg (cmode, ops[1]);
30662 if (!register_operand (ops[0], cmode))
30663 ops[0] = force_reg (cmode, ops[0]);
30664 emit_insn (gen_rtx_SET (VOIDmode, target,
30665 gen_rtx_VEC_CONCAT (mode, ops[0],
30666 ops[1])));
30667 break;
30668
30669 case 4:
30670 switch (mode)
30671 {
30672 case V4DImode:
30673 cmode = V2DImode;
30674 break;
30675 case V4DFmode:
30676 cmode = V2DFmode;
30677 break;
30678 case V4SImode:
30679 cmode = V2SImode;
30680 break;
30681 case V4SFmode:
30682 cmode = V2SFmode;
30683 break;
30684 default:
30685 gcc_unreachable ();
30686 }
30687 goto half;
30688
30689 case 8:
30690 switch (mode)
30691 {
30692 case V8SImode:
30693 cmode = V2SImode;
30694 hmode = V4SImode;
30695 break;
30696 case V8SFmode:
30697 cmode = V2SFmode;
30698 hmode = V4SFmode;
30699 break;
30700 default:
30701 gcc_unreachable ();
30702 }
30703 goto half;
30704
30705 half:
30706 /* FIXME: We process inputs backward to help RA. PR 36222. */
30707 i = n - 1;
30708 j = (n >> 1) - 1;
30709 for (; i > 0; i -= 2, j--)
30710 {
30711 first[j] = gen_reg_rtx (cmode);
30712 v = gen_rtvec (2, ops[i - 1], ops[i]);
30713 ix86_expand_vector_init (false, first[j],
30714 gen_rtx_PARALLEL (cmode, v));
30715 }
30716
30717 n >>= 1;
30718 if (n > 2)
30719 {
30720 gcc_assert (hmode != VOIDmode);
30721 for (i = j = 0; i < n; i += 2, j++)
30722 {
30723 second[j] = gen_reg_rtx (hmode);
30724 ix86_expand_vector_init_concat (hmode, second [j],
30725 &first [i], 2);
30726 }
30727 n >>= 1;
30728 ix86_expand_vector_init_concat (mode, target, second, n);
30729 }
30730 else
30731 ix86_expand_vector_init_concat (mode, target, first, n);
30732 break;
30733
30734 default:
30735 gcc_unreachable ();
30736 }
30737 }
30738
30739 /* A subroutine of ix86_expand_vector_init_general. Use vector
30740 interleave to handle the most general case: all values variable,
30741 and none identical. */
30742
30743 static void
30744 ix86_expand_vector_init_interleave (enum machine_mode mode,
30745 rtx target, rtx *ops, int n)
30746 {
30747 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30748 int i, j;
30749 rtx op0, op1;
30750 rtx (*gen_load_even) (rtx, rtx, rtx);
30751 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30752 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30753
30754 switch (mode)
30755 {
30756 case V8HImode:
30757 gen_load_even = gen_vec_setv8hi;
30758 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30759 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30760 inner_mode = HImode;
30761 first_imode = V4SImode;
30762 second_imode = V2DImode;
30763 third_imode = VOIDmode;
30764 break;
30765 case V16QImode:
30766 gen_load_even = gen_vec_setv16qi;
30767 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30768 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30769 inner_mode = QImode;
30770 first_imode = V8HImode;
30771 second_imode = V4SImode;
30772 third_imode = V2DImode;
30773 break;
30774 default:
30775 gcc_unreachable ();
30776 }
30777
30778 for (i = 0; i < n; i++)
30779 {
30780 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30781 op0 = gen_reg_rtx (SImode);
30782 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30783
30784 /* Insert the SImode value as low element of V4SImode vector. */
30785 op1 = gen_reg_rtx (V4SImode);
30786 op0 = gen_rtx_VEC_MERGE (V4SImode,
30787 gen_rtx_VEC_DUPLICATE (V4SImode,
30788 op0),
30789 CONST0_RTX (V4SImode),
30790 const1_rtx);
30791 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30792
30793 /* Cast the V4SImode vector back to a vector in orignal mode. */
30794 op0 = gen_reg_rtx (mode);
30795 emit_move_insn (op0, gen_lowpart (mode, op1));
30796
30797 /* Load even elements into the second positon. */
30798 emit_insn (gen_load_even (op0,
30799 force_reg (inner_mode,
30800 ops [i + i + 1]),
30801 const1_rtx));
30802
30803 /* Cast vector to FIRST_IMODE vector. */
30804 ops[i] = gen_reg_rtx (first_imode);
30805 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30806 }
30807
30808 /* Interleave low FIRST_IMODE vectors. */
30809 for (i = j = 0; i < n; i += 2, j++)
30810 {
30811 op0 = gen_reg_rtx (first_imode);
30812 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30813
30814 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30815 ops[j] = gen_reg_rtx (second_imode);
30816 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30817 }
30818
30819 /* Interleave low SECOND_IMODE vectors. */
30820 switch (second_imode)
30821 {
30822 case V4SImode:
30823 for (i = j = 0; i < n / 2; i += 2, j++)
30824 {
30825 op0 = gen_reg_rtx (second_imode);
30826 emit_insn (gen_interleave_second_low (op0, ops[i],
30827 ops[i + 1]));
30828
30829 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30830 vector. */
30831 ops[j] = gen_reg_rtx (third_imode);
30832 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30833 }
30834 second_imode = V2DImode;
30835 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30836 /* FALLTHRU */
30837
30838 case V2DImode:
30839 op0 = gen_reg_rtx (second_imode);
30840 emit_insn (gen_interleave_second_low (op0, ops[0],
30841 ops[1]));
30842
30843 /* Cast the SECOND_IMODE vector back to a vector on original
30844 mode. */
30845 emit_insn (gen_rtx_SET (VOIDmode, target,
30846 gen_lowpart (mode, op0)));
30847 break;
30848
30849 default:
30850 gcc_unreachable ();
30851 }
30852 }
30853
30854 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30855 all values variable, and none identical. */
30856
30857 static void
30858 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30859 rtx target, rtx vals)
30860 {
30861 rtx ops[32], op0, op1;
30862 enum machine_mode half_mode = VOIDmode;
30863 int n, i;
30864
30865 switch (mode)
30866 {
30867 case V2SFmode:
30868 case V2SImode:
30869 if (!mmx_ok && !TARGET_SSE)
30870 break;
30871 /* FALLTHRU */
30872
30873 case V8SFmode:
30874 case V8SImode:
30875 case V4DFmode:
30876 case V4DImode:
30877 case V4SFmode:
30878 case V4SImode:
30879 case V2DFmode:
30880 case V2DImode:
30881 n = GET_MODE_NUNITS (mode);
30882 for (i = 0; i < n; i++)
30883 ops[i] = XVECEXP (vals, 0, i);
30884 ix86_expand_vector_init_concat (mode, target, ops, n);
30885 return;
30886
30887 case V32QImode:
30888 half_mode = V16QImode;
30889 goto half;
30890
30891 case V16HImode:
30892 half_mode = V8HImode;
30893 goto half;
30894
30895 half:
30896 n = GET_MODE_NUNITS (mode);
30897 for (i = 0; i < n; i++)
30898 ops[i] = XVECEXP (vals, 0, i);
30899 op0 = gen_reg_rtx (half_mode);
30900 op1 = gen_reg_rtx (half_mode);
30901 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30902 n >> 2);
30903 ix86_expand_vector_init_interleave (half_mode, op1,
30904 &ops [n >> 1], n >> 2);
30905 emit_insn (gen_rtx_SET (VOIDmode, target,
30906 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30907 return;
30908
30909 case V16QImode:
30910 if (!TARGET_SSE4_1)
30911 break;
30912 /* FALLTHRU */
30913
30914 case V8HImode:
30915 if (!TARGET_SSE2)
30916 break;
30917
30918 /* Don't use ix86_expand_vector_init_interleave if we can't
30919 move from GPR to SSE register directly. */
30920 if (!TARGET_INTER_UNIT_MOVES)
30921 break;
30922
30923 n = GET_MODE_NUNITS (mode);
30924 for (i = 0; i < n; i++)
30925 ops[i] = XVECEXP (vals, 0, i);
30926 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30927 return;
30928
30929 case V4HImode:
30930 case V8QImode:
30931 break;
30932
30933 default:
30934 gcc_unreachable ();
30935 }
30936
30937 {
30938 int i, j, n_elts, n_words, n_elt_per_word;
30939 enum machine_mode inner_mode;
30940 rtx words[4], shift;
30941
30942 inner_mode = GET_MODE_INNER (mode);
30943 n_elts = GET_MODE_NUNITS (mode);
30944 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30945 n_elt_per_word = n_elts / n_words;
30946 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30947
30948 for (i = 0; i < n_words; ++i)
30949 {
30950 rtx word = NULL_RTX;
30951
30952 for (j = 0; j < n_elt_per_word; ++j)
30953 {
30954 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30955 elt = convert_modes (word_mode, inner_mode, elt, true);
30956
30957 if (j == 0)
30958 word = elt;
30959 else
30960 {
30961 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30962 word, 1, OPTAB_LIB_WIDEN);
30963 word = expand_simple_binop (word_mode, IOR, word, elt,
30964 word, 1, OPTAB_LIB_WIDEN);
30965 }
30966 }
30967
30968 words[i] = word;
30969 }
30970
30971 if (n_words == 1)
30972 emit_move_insn (target, gen_lowpart (mode, words[0]));
30973 else if (n_words == 2)
30974 {
30975 rtx tmp = gen_reg_rtx (mode);
30976 emit_clobber (tmp);
30977 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30978 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30979 emit_move_insn (target, tmp);
30980 }
30981 else if (n_words == 4)
30982 {
30983 rtx tmp = gen_reg_rtx (V4SImode);
30984 gcc_assert (word_mode == SImode);
30985 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30986 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30987 emit_move_insn (target, gen_lowpart (mode, tmp));
30988 }
30989 else
30990 gcc_unreachable ();
30991 }
30992 }
30993
30994 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30995 instructions unless MMX_OK is true. */
30996
30997 void
30998 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30999 {
31000 enum machine_mode mode = GET_MODE (target);
31001 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31002 int n_elts = GET_MODE_NUNITS (mode);
31003 int n_var = 0, one_var = -1;
31004 bool all_same = true, all_const_zero = true;
31005 int i;
31006 rtx x;
31007
31008 for (i = 0; i < n_elts; ++i)
31009 {
31010 x = XVECEXP (vals, 0, i);
31011 if (!(CONST_INT_P (x)
31012 || GET_CODE (x) == CONST_DOUBLE
31013 || GET_CODE (x) == CONST_FIXED))
31014 n_var++, one_var = i;
31015 else if (x != CONST0_RTX (inner_mode))
31016 all_const_zero = false;
31017 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31018 all_same = false;
31019 }
31020
31021 /* Constants are best loaded from the constant pool. */
31022 if (n_var == 0)
31023 {
31024 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31025 return;
31026 }
31027
31028 /* If all values are identical, broadcast the value. */
31029 if (all_same
31030 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31031 XVECEXP (vals, 0, 0)))
31032 return;
31033
31034 /* Values where only one field is non-constant are best loaded from
31035 the pool and overwritten via move later. */
31036 if (n_var == 1)
31037 {
31038 if (all_const_zero
31039 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31040 XVECEXP (vals, 0, one_var),
31041 one_var))
31042 return;
31043
31044 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31045 return;
31046 }
31047
31048 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31049 }
31050
31051 void
31052 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31053 {
31054 enum machine_mode mode = GET_MODE (target);
31055 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31056 enum machine_mode half_mode;
31057 bool use_vec_merge = false;
31058 rtx tmp;
31059 static rtx (*gen_extract[6][2]) (rtx, rtx)
31060 = {
31061 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31062 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31063 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31064 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31065 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31066 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31067 };
31068 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31069 = {
31070 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31071 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31072 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31073 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31074 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31075 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31076 };
31077 int i, j, n;
31078
31079 switch (mode)
31080 {
31081 case V2SFmode:
31082 case V2SImode:
31083 if (mmx_ok)
31084 {
31085 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31086 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31087 if (elt == 0)
31088 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31089 else
31090 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31091 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31092 return;
31093 }
31094 break;
31095
31096 case V2DImode:
31097 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31098 if (use_vec_merge)
31099 break;
31100
31101 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31102 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31103 if (elt == 0)
31104 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31105 else
31106 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31107 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31108 return;
31109
31110 case V2DFmode:
31111 {
31112 rtx op0, op1;
31113
31114 /* For the two element vectors, we implement a VEC_CONCAT with
31115 the extraction of the other element. */
31116
31117 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31118 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31119
31120 if (elt == 0)
31121 op0 = val, op1 = tmp;
31122 else
31123 op0 = tmp, op1 = val;
31124
31125 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31126 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31127 }
31128 return;
31129
31130 case V4SFmode:
31131 use_vec_merge = TARGET_SSE4_1;
31132 if (use_vec_merge)
31133 break;
31134
31135 switch (elt)
31136 {
31137 case 0:
31138 use_vec_merge = true;
31139 break;
31140
31141 case 1:
31142 /* tmp = target = A B C D */
31143 tmp = copy_to_reg (target);
31144 /* target = A A B B */
31145 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31146 /* target = X A B B */
31147 ix86_expand_vector_set (false, target, val, 0);
31148 /* target = A X C D */
31149 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31150 const1_rtx, const0_rtx,
31151 GEN_INT (2+4), GEN_INT (3+4)));
31152 return;
31153
31154 case 2:
31155 /* tmp = target = A B C D */
31156 tmp = copy_to_reg (target);
31157 /* tmp = X B C D */
31158 ix86_expand_vector_set (false, tmp, val, 0);
31159 /* target = A B X D */
31160 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31161 const0_rtx, const1_rtx,
31162 GEN_INT (0+4), GEN_INT (3+4)));
31163 return;
31164
31165 case 3:
31166 /* tmp = target = A B C D */
31167 tmp = copy_to_reg (target);
31168 /* tmp = X B C D */
31169 ix86_expand_vector_set (false, tmp, val, 0);
31170 /* target = A B X D */
31171 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31172 const0_rtx, const1_rtx,
31173 GEN_INT (2+4), GEN_INT (0+4)));
31174 return;
31175
31176 default:
31177 gcc_unreachable ();
31178 }
31179 break;
31180
31181 case V4SImode:
31182 use_vec_merge = TARGET_SSE4_1;
31183 if (use_vec_merge)
31184 break;
31185
31186 /* Element 0 handled by vec_merge below. */
31187 if (elt == 0)
31188 {
31189 use_vec_merge = true;
31190 break;
31191 }
31192
31193 if (TARGET_SSE2)
31194 {
31195 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31196 store into element 0, then shuffle them back. */
31197
31198 rtx order[4];
31199
31200 order[0] = GEN_INT (elt);
31201 order[1] = const1_rtx;
31202 order[2] = const2_rtx;
31203 order[3] = GEN_INT (3);
31204 order[elt] = const0_rtx;
31205
31206 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31207 order[1], order[2], order[3]));
31208
31209 ix86_expand_vector_set (false, target, val, 0);
31210
31211 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31212 order[1], order[2], order[3]));
31213 }
31214 else
31215 {
31216 /* For SSE1, we have to reuse the V4SF code. */
31217 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31218 gen_lowpart (SFmode, val), elt);
31219 }
31220 return;
31221
31222 case V8HImode:
31223 use_vec_merge = TARGET_SSE2;
31224 break;
31225 case V4HImode:
31226 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31227 break;
31228
31229 case V16QImode:
31230 use_vec_merge = TARGET_SSE4_1;
31231 break;
31232
31233 case V8QImode:
31234 break;
31235
31236 case V32QImode:
31237 half_mode = V16QImode;
31238 j = 0;
31239 n = 16;
31240 goto half;
31241
31242 case V16HImode:
31243 half_mode = V8HImode;
31244 j = 1;
31245 n = 8;
31246 goto half;
31247
31248 case V8SImode:
31249 half_mode = V4SImode;
31250 j = 2;
31251 n = 4;
31252 goto half;
31253
31254 case V4DImode:
31255 half_mode = V2DImode;
31256 j = 3;
31257 n = 2;
31258 goto half;
31259
31260 case V8SFmode:
31261 half_mode = V4SFmode;
31262 j = 4;
31263 n = 4;
31264 goto half;
31265
31266 case V4DFmode:
31267 half_mode = V2DFmode;
31268 j = 5;
31269 n = 2;
31270 goto half;
31271
31272 half:
31273 /* Compute offset. */
31274 i = elt / n;
31275 elt %= n;
31276
31277 gcc_assert (i <= 1);
31278
31279 /* Extract the half. */
31280 tmp = gen_reg_rtx (half_mode);
31281 emit_insn (gen_extract[j][i] (tmp, target));
31282
31283 /* Put val in tmp at elt. */
31284 ix86_expand_vector_set (false, tmp, val, elt);
31285
31286 /* Put it back. */
31287 emit_insn (gen_insert[j][i] (target, target, tmp));
31288 return;
31289
31290 default:
31291 break;
31292 }
31293
31294 if (use_vec_merge)
31295 {
31296 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31297 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31298 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31299 }
31300 else
31301 {
31302 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31303
31304 emit_move_insn (mem, target);
31305
31306 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31307 emit_move_insn (tmp, val);
31308
31309 emit_move_insn (target, mem);
31310 }
31311 }
31312
31313 void
31314 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31315 {
31316 enum machine_mode mode = GET_MODE (vec);
31317 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31318 bool use_vec_extr = false;
31319 rtx tmp;
31320
31321 switch (mode)
31322 {
31323 case V2SImode:
31324 case V2SFmode:
31325 if (!mmx_ok)
31326 break;
31327 /* FALLTHRU */
31328
31329 case V2DFmode:
31330 case V2DImode:
31331 use_vec_extr = true;
31332 break;
31333
31334 case V4SFmode:
31335 use_vec_extr = TARGET_SSE4_1;
31336 if (use_vec_extr)
31337 break;
31338
31339 switch (elt)
31340 {
31341 case 0:
31342 tmp = vec;
31343 break;
31344
31345 case 1:
31346 case 3:
31347 tmp = gen_reg_rtx (mode);
31348 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31349 GEN_INT (elt), GEN_INT (elt),
31350 GEN_INT (elt+4), GEN_INT (elt+4)));
31351 break;
31352
31353 case 2:
31354 tmp = gen_reg_rtx (mode);
31355 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31356 break;
31357
31358 default:
31359 gcc_unreachable ();
31360 }
31361 vec = tmp;
31362 use_vec_extr = true;
31363 elt = 0;
31364 break;
31365
31366 case V4SImode:
31367 use_vec_extr = TARGET_SSE4_1;
31368 if (use_vec_extr)
31369 break;
31370
31371 if (TARGET_SSE2)
31372 {
31373 switch (elt)
31374 {
31375 case 0:
31376 tmp = vec;
31377 break;
31378
31379 case 1:
31380 case 3:
31381 tmp = gen_reg_rtx (mode);
31382 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31383 GEN_INT (elt), GEN_INT (elt),
31384 GEN_INT (elt), GEN_INT (elt)));
31385 break;
31386
31387 case 2:
31388 tmp = gen_reg_rtx (mode);
31389 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31390 break;
31391
31392 default:
31393 gcc_unreachable ();
31394 }
31395 vec = tmp;
31396 use_vec_extr = true;
31397 elt = 0;
31398 }
31399 else
31400 {
31401 /* For SSE1, we have to reuse the V4SF code. */
31402 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31403 gen_lowpart (V4SFmode, vec), elt);
31404 return;
31405 }
31406 break;
31407
31408 case V8HImode:
31409 use_vec_extr = TARGET_SSE2;
31410 break;
31411 case V4HImode:
31412 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31413 break;
31414
31415 case V16QImode:
31416 use_vec_extr = TARGET_SSE4_1;
31417 break;
31418
31419 case V8QImode:
31420 /* ??? Could extract the appropriate HImode element and shift. */
31421 default:
31422 break;
31423 }
31424
31425 if (use_vec_extr)
31426 {
31427 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31428 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31429
31430 /* Let the rtl optimizers know about the zero extension performed. */
31431 if (inner_mode == QImode || inner_mode == HImode)
31432 {
31433 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31434 target = gen_lowpart (SImode, target);
31435 }
31436
31437 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31438 }
31439 else
31440 {
31441 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31442
31443 emit_move_insn (mem, vec);
31444
31445 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31446 emit_move_insn (target, tmp);
31447 }
31448 }
31449
31450 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31451 pattern to reduce; DEST is the destination; IN is the input vector. */
31452
31453 void
31454 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31455 {
31456 rtx tmp1, tmp2, tmp3;
31457
31458 tmp1 = gen_reg_rtx (V4SFmode);
31459 tmp2 = gen_reg_rtx (V4SFmode);
31460 tmp3 = gen_reg_rtx (V4SFmode);
31461
31462 emit_insn (gen_sse_movhlps (tmp1, in, in));
31463 emit_insn (fn (tmp2, tmp1, in));
31464
31465 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31466 const1_rtx, const1_rtx,
31467 GEN_INT (1+4), GEN_INT (1+4)));
31468 emit_insn (fn (dest, tmp2, tmp3));
31469 }
31470 \f
31471 /* Target hook for scalar_mode_supported_p. */
31472 static bool
31473 ix86_scalar_mode_supported_p (enum machine_mode mode)
31474 {
31475 if (DECIMAL_FLOAT_MODE_P (mode))
31476 return default_decimal_float_supported_p ();
31477 else if (mode == TFmode)
31478 return true;
31479 else
31480 return default_scalar_mode_supported_p (mode);
31481 }
31482
31483 /* Implements target hook vector_mode_supported_p. */
31484 static bool
31485 ix86_vector_mode_supported_p (enum machine_mode mode)
31486 {
31487 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31488 return true;
31489 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31490 return true;
31491 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31492 return true;
31493 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31494 return true;
31495 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31496 return true;
31497 return false;
31498 }
31499
31500 /* Target hook for c_mode_for_suffix. */
31501 static enum machine_mode
31502 ix86_c_mode_for_suffix (char suffix)
31503 {
31504 if (suffix == 'q')
31505 return TFmode;
31506 if (suffix == 'w')
31507 return XFmode;
31508
31509 return VOIDmode;
31510 }
31511
31512 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31513
31514 We do this in the new i386 backend to maintain source compatibility
31515 with the old cc0-based compiler. */
31516
31517 static tree
31518 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31519 tree inputs ATTRIBUTE_UNUSED,
31520 tree clobbers)
31521 {
31522 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31523 clobbers);
31524 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31525 clobbers);
31526 return clobbers;
31527 }
31528
31529 /* Implements target vector targetm.asm.encode_section_info. */
31530
31531 static void ATTRIBUTE_UNUSED
31532 ix86_encode_section_info (tree decl, rtx rtl, int first)
31533 {
31534 default_encode_section_info (decl, rtl, first);
31535
31536 if (TREE_CODE (decl) == VAR_DECL
31537 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31538 && ix86_in_large_data_p (decl))
31539 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31540 }
31541
31542 /* Worker function for REVERSE_CONDITION. */
31543
31544 enum rtx_code
31545 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31546 {
31547 return (mode != CCFPmode && mode != CCFPUmode
31548 ? reverse_condition (code)
31549 : reverse_condition_maybe_unordered (code));
31550 }
31551
31552 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31553 to OPERANDS[0]. */
31554
31555 const char *
31556 output_387_reg_move (rtx insn, rtx *operands)
31557 {
31558 if (REG_P (operands[0]))
31559 {
31560 if (REG_P (operands[1])
31561 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31562 {
31563 if (REGNO (operands[0]) == FIRST_STACK_REG)
31564 return output_387_ffreep (operands, 0);
31565 return "fstp\t%y0";
31566 }
31567 if (STACK_TOP_P (operands[0]))
31568 return "fld%Z1\t%y1";
31569 return "fst\t%y0";
31570 }
31571 else if (MEM_P (operands[0]))
31572 {
31573 gcc_assert (REG_P (operands[1]));
31574 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31575 return "fstp%Z0\t%y0";
31576 else
31577 {
31578 /* There is no non-popping store to memory for XFmode.
31579 So if we need one, follow the store with a load. */
31580 if (GET_MODE (operands[0]) == XFmode)
31581 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31582 else
31583 return "fst%Z0\t%y0";
31584 }
31585 }
31586 else
31587 gcc_unreachable();
31588 }
31589
31590 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31591 FP status register is set. */
31592
31593 void
31594 ix86_emit_fp_unordered_jump (rtx label)
31595 {
31596 rtx reg = gen_reg_rtx (HImode);
31597 rtx temp;
31598
31599 emit_insn (gen_x86_fnstsw_1 (reg));
31600
31601 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31602 {
31603 emit_insn (gen_x86_sahf_1 (reg));
31604
31605 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31606 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31607 }
31608 else
31609 {
31610 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31611
31612 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31613 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31614 }
31615
31616 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31617 gen_rtx_LABEL_REF (VOIDmode, label),
31618 pc_rtx);
31619 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31620
31621 emit_jump_insn (temp);
31622 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31623 }
31624
31625 /* Output code to perform a log1p XFmode calculation. */
31626
31627 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31628 {
31629 rtx label1 = gen_label_rtx ();
31630 rtx label2 = gen_label_rtx ();
31631
31632 rtx tmp = gen_reg_rtx (XFmode);
31633 rtx tmp2 = gen_reg_rtx (XFmode);
31634 rtx test;
31635
31636 emit_insn (gen_absxf2 (tmp, op1));
31637 test = gen_rtx_GE (VOIDmode, tmp,
31638 CONST_DOUBLE_FROM_REAL_VALUE (
31639 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31640 XFmode));
31641 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31642
31643 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31644 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31645 emit_jump (label2);
31646
31647 emit_label (label1);
31648 emit_move_insn (tmp, CONST1_RTX (XFmode));
31649 emit_insn (gen_addxf3 (tmp, op1, tmp));
31650 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31651 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31652
31653 emit_label (label2);
31654 }
31655
31656 /* Output code to perform a Newton-Rhapson approximation of a single precision
31657 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31658
31659 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31660 {
31661 rtx x0, x1, e0, e1;
31662
31663 x0 = gen_reg_rtx (mode);
31664 e0 = gen_reg_rtx (mode);
31665 e1 = gen_reg_rtx (mode);
31666 x1 = gen_reg_rtx (mode);
31667
31668 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31669
31670 /* x0 = rcp(b) estimate */
31671 emit_insn (gen_rtx_SET (VOIDmode, x0,
31672 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31673 UNSPEC_RCP)));
31674 /* e0 = x0 * b */
31675 emit_insn (gen_rtx_SET (VOIDmode, e0,
31676 gen_rtx_MULT (mode, x0, b)));
31677
31678 /* e0 = x0 * e0 */
31679 emit_insn (gen_rtx_SET (VOIDmode, e0,
31680 gen_rtx_MULT (mode, x0, e0)));
31681
31682 /* e1 = x0 + x0 */
31683 emit_insn (gen_rtx_SET (VOIDmode, e1,
31684 gen_rtx_PLUS (mode, x0, x0)));
31685
31686 /* x1 = e1 - e0 */
31687 emit_insn (gen_rtx_SET (VOIDmode, x1,
31688 gen_rtx_MINUS (mode, e1, e0)));
31689
31690 /* res = a * x1 */
31691 emit_insn (gen_rtx_SET (VOIDmode, res,
31692 gen_rtx_MULT (mode, a, x1)));
31693 }
31694
31695 /* Output code to perform a Newton-Rhapson approximation of a
31696 single precision floating point [reciprocal] square root. */
31697
31698 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31699 bool recip)
31700 {
31701 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31702 REAL_VALUE_TYPE r;
31703
31704 x0 = gen_reg_rtx (mode);
31705 e0 = gen_reg_rtx (mode);
31706 e1 = gen_reg_rtx (mode);
31707 e2 = gen_reg_rtx (mode);
31708 e3 = gen_reg_rtx (mode);
31709
31710 real_from_integer (&r, VOIDmode, -3, -1, 0);
31711 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31712
31713 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31714 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31715
31716 if (VECTOR_MODE_P (mode))
31717 {
31718 mthree = ix86_build_const_vector (mode, true, mthree);
31719 mhalf = ix86_build_const_vector (mode, true, mhalf);
31720 }
31721
31722 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31723 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31724
31725 /* x0 = rsqrt(a) estimate */
31726 emit_insn (gen_rtx_SET (VOIDmode, x0,
31727 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31728 UNSPEC_RSQRT)));
31729
31730 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31731 if (!recip)
31732 {
31733 rtx zero, mask;
31734
31735 zero = gen_reg_rtx (mode);
31736 mask = gen_reg_rtx (mode);
31737
31738 zero = force_reg (mode, CONST0_RTX(mode));
31739 emit_insn (gen_rtx_SET (VOIDmode, mask,
31740 gen_rtx_NE (mode, zero, a)));
31741
31742 emit_insn (gen_rtx_SET (VOIDmode, x0,
31743 gen_rtx_AND (mode, x0, mask)));
31744 }
31745
31746 /* e0 = x0 * a */
31747 emit_insn (gen_rtx_SET (VOIDmode, e0,
31748 gen_rtx_MULT (mode, x0, a)));
31749 /* e1 = e0 * x0 */
31750 emit_insn (gen_rtx_SET (VOIDmode, e1,
31751 gen_rtx_MULT (mode, e0, x0)));
31752
31753 /* e2 = e1 - 3. */
31754 mthree = force_reg (mode, mthree);
31755 emit_insn (gen_rtx_SET (VOIDmode, e2,
31756 gen_rtx_PLUS (mode, e1, mthree)));
31757
31758 mhalf = force_reg (mode, mhalf);
31759 if (recip)
31760 /* e3 = -.5 * x0 */
31761 emit_insn (gen_rtx_SET (VOIDmode, e3,
31762 gen_rtx_MULT (mode, x0, mhalf)));
31763 else
31764 /* e3 = -.5 * e0 */
31765 emit_insn (gen_rtx_SET (VOIDmode, e3,
31766 gen_rtx_MULT (mode, e0, mhalf)));
31767 /* ret = e2 * e3 */
31768 emit_insn (gen_rtx_SET (VOIDmode, res,
31769 gen_rtx_MULT (mode, e2, e3)));
31770 }
31771
31772 #ifdef TARGET_SOLARIS
31773 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31774
31775 static void
31776 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31777 tree decl)
31778 {
31779 /* With Binutils 2.15, the "@unwind" marker must be specified on
31780 every occurrence of the ".eh_frame" section, not just the first
31781 one. */
31782 if (TARGET_64BIT
31783 && strcmp (name, ".eh_frame") == 0)
31784 {
31785 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31786 flags & SECTION_WRITE ? "aw" : "a");
31787 return;
31788 }
31789
31790 #ifndef USE_GAS
31791 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31792 {
31793 solaris_elf_asm_comdat_section (name, flags, decl);
31794 return;
31795 }
31796 #endif
31797
31798 default_elf_asm_named_section (name, flags, decl);
31799 }
31800 #endif /* TARGET_SOLARIS */
31801
31802 /* Return the mangling of TYPE if it is an extended fundamental type. */
31803
31804 static const char *
31805 ix86_mangle_type (const_tree type)
31806 {
31807 type = TYPE_MAIN_VARIANT (type);
31808
31809 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31810 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31811 return NULL;
31812
31813 switch (TYPE_MODE (type))
31814 {
31815 case TFmode:
31816 /* __float128 is "g". */
31817 return "g";
31818 case XFmode:
31819 /* "long double" or __float80 is "e". */
31820 return "e";
31821 default:
31822 return NULL;
31823 }
31824 }
31825
31826 /* For 32-bit code we can save PIC register setup by using
31827 __stack_chk_fail_local hidden function instead of calling
31828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31829 register, so it is better to call __stack_chk_fail directly. */
31830
31831 static tree ATTRIBUTE_UNUSED
31832 ix86_stack_protect_fail (void)
31833 {
31834 return TARGET_64BIT
31835 ? default_external_stack_protect_fail ()
31836 : default_hidden_stack_protect_fail ();
31837 }
31838
31839 /* Select a format to encode pointers in exception handling data. CODE
31840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31841 true if the symbol may be affected by dynamic relocations.
31842
31843 ??? All x86 object file formats are capable of representing this.
31844 After all, the relocation needed is the same as for the call insn.
31845 Whether or not a particular assembler allows us to enter such, I
31846 guess we'll have to see. */
31847 int
31848 asm_preferred_eh_data_format (int code, int global)
31849 {
31850 if (flag_pic)
31851 {
31852 int type = DW_EH_PE_sdata8;
31853 if (!TARGET_64BIT
31854 || ix86_cmodel == CM_SMALL_PIC
31855 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31856 type = DW_EH_PE_sdata4;
31857 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31858 }
31859 if (ix86_cmodel == CM_SMALL
31860 || (ix86_cmodel == CM_MEDIUM && code))
31861 return DW_EH_PE_udata4;
31862 return DW_EH_PE_absptr;
31863 }
31864 \f
31865 /* Expand copysign from SIGN to the positive value ABS_VALUE
31866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31867 the sign-bit. */
31868 static void
31869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31870 {
31871 enum machine_mode mode = GET_MODE (sign);
31872 rtx sgn = gen_reg_rtx (mode);
31873 if (mask == NULL_RTX)
31874 {
31875 enum machine_mode vmode;
31876
31877 if (mode == SFmode)
31878 vmode = V4SFmode;
31879 else if (mode == DFmode)
31880 vmode = V2DFmode;
31881 else
31882 vmode = mode;
31883
31884 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31885 if (!VECTOR_MODE_P (mode))
31886 {
31887 /* We need to generate a scalar mode mask in this case. */
31888 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31889 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31890 mask = gen_reg_rtx (mode);
31891 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31892 }
31893 }
31894 else
31895 mask = gen_rtx_NOT (mode, mask);
31896 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31897 gen_rtx_AND (mode, mask, sign)));
31898 emit_insn (gen_rtx_SET (VOIDmode, result,
31899 gen_rtx_IOR (mode, abs_value, sgn)));
31900 }
31901
31902 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31903 mask for masking out the sign-bit is stored in *SMASK, if that is
31904 non-null. */
31905 static rtx
31906 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31907 {
31908 enum machine_mode vmode, mode = GET_MODE (op0);
31909 rtx xa, mask;
31910
31911 xa = gen_reg_rtx (mode);
31912 if (mode == SFmode)
31913 vmode = V4SFmode;
31914 else if (mode == DFmode)
31915 vmode = V2DFmode;
31916 else
31917 vmode = mode;
31918 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31919 if (!VECTOR_MODE_P (mode))
31920 {
31921 /* We need to generate a scalar mode mask in this case. */
31922 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31923 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31924 mask = gen_reg_rtx (mode);
31925 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31926 }
31927 emit_insn (gen_rtx_SET (VOIDmode, xa,
31928 gen_rtx_AND (mode, op0, mask)));
31929
31930 if (smask)
31931 *smask = mask;
31932
31933 return xa;
31934 }
31935
31936 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31937 swapping the operands if SWAP_OPERANDS is true. The expanded
31938 code is a forward jump to a newly created label in case the
31939 comparison is true. The generated label rtx is returned. */
31940 static rtx
31941 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31942 bool swap_operands)
31943 {
31944 rtx label, tmp;
31945
31946 if (swap_operands)
31947 {
31948 tmp = op0;
31949 op0 = op1;
31950 op1 = tmp;
31951 }
31952
31953 label = gen_label_rtx ();
31954 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31955 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31956 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31957 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31958 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31959 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31960 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31961 JUMP_LABEL (tmp) = label;
31962
31963 return label;
31964 }
31965
31966 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31967 using comparison code CODE. Operands are swapped for the comparison if
31968 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31969 static rtx
31970 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31971 bool swap_operands)
31972 {
31973 rtx (*insn)(rtx, rtx, rtx, rtx);
31974 enum machine_mode mode = GET_MODE (op0);
31975 rtx mask = gen_reg_rtx (mode);
31976
31977 if (swap_operands)
31978 {
31979 rtx tmp = op0;
31980 op0 = op1;
31981 op1 = tmp;
31982 }
31983
31984 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
31985
31986 emit_insn (insn (mask, op0, op1,
31987 gen_rtx_fmt_ee (code, mode, op0, op1)));
31988 return mask;
31989 }
31990
31991 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31992 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31993 static rtx
31994 ix86_gen_TWO52 (enum machine_mode mode)
31995 {
31996 REAL_VALUE_TYPE TWO52r;
31997 rtx TWO52;
31998
31999 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32000 TWO52 = const_double_from_real_value (TWO52r, mode);
32001 TWO52 = force_reg (mode, TWO52);
32002
32003 return TWO52;
32004 }
32005
32006 /* Expand SSE sequence for computing lround from OP1 storing
32007 into OP0. */
32008 void
32009 ix86_expand_lround (rtx op0, rtx op1)
32010 {
32011 /* C code for the stuff we're doing below:
32012 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32013 return (long)tmp;
32014 */
32015 enum machine_mode mode = GET_MODE (op1);
32016 const struct real_format *fmt;
32017 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32018 rtx adj;
32019
32020 /* load nextafter (0.5, 0.0) */
32021 fmt = REAL_MODE_FORMAT (mode);
32022 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32023 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32024
32025 /* adj = copysign (0.5, op1) */
32026 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32027 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32028
32029 /* adj = op1 + adj */
32030 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32031
32032 /* op0 = (imode)adj */
32033 expand_fix (op0, adj, 0);
32034 }
32035
32036 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32037 into OPERAND0. */
32038 void
32039 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32040 {
32041 /* C code for the stuff we're doing below (for do_floor):
32042 xi = (long)op1;
32043 xi -= (double)xi > op1 ? 1 : 0;
32044 return xi;
32045 */
32046 enum machine_mode fmode = GET_MODE (op1);
32047 enum machine_mode imode = GET_MODE (op0);
32048 rtx ireg, freg, label, tmp;
32049
32050 /* reg = (long)op1 */
32051 ireg = gen_reg_rtx (imode);
32052 expand_fix (ireg, op1, 0);
32053
32054 /* freg = (double)reg */
32055 freg = gen_reg_rtx (fmode);
32056 expand_float (freg, ireg, 0);
32057
32058 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32059 label = ix86_expand_sse_compare_and_jump (UNLE,
32060 freg, op1, !do_floor);
32061 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32062 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32063 emit_move_insn (ireg, tmp);
32064
32065 emit_label (label);
32066 LABEL_NUSES (label) = 1;
32067
32068 emit_move_insn (op0, ireg);
32069 }
32070
32071 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32072 result in OPERAND0. */
32073 void
32074 ix86_expand_rint (rtx operand0, rtx operand1)
32075 {
32076 /* C code for the stuff we're doing below:
32077 xa = fabs (operand1);
32078 if (!isless (xa, 2**52))
32079 return operand1;
32080 xa = xa + 2**52 - 2**52;
32081 return copysign (xa, operand1);
32082 */
32083 enum machine_mode mode = GET_MODE (operand0);
32084 rtx res, xa, label, TWO52, mask;
32085
32086 res = gen_reg_rtx (mode);
32087 emit_move_insn (res, operand1);
32088
32089 /* xa = abs (operand1) */
32090 xa = ix86_expand_sse_fabs (res, &mask);
32091
32092 /* if (!isless (xa, TWO52)) goto label; */
32093 TWO52 = ix86_gen_TWO52 (mode);
32094 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32095
32096 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32097 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32098
32099 ix86_sse_copysign_to_positive (res, xa, res, mask);
32100
32101 emit_label (label);
32102 LABEL_NUSES (label) = 1;
32103
32104 emit_move_insn (operand0, res);
32105 }
32106
32107 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32108 into OPERAND0. */
32109 void
32110 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32111 {
32112 /* C code for the stuff we expand below.
32113 double xa = fabs (x), x2;
32114 if (!isless (xa, TWO52))
32115 return x;
32116 xa = xa + TWO52 - TWO52;
32117 x2 = copysign (xa, x);
32118 Compensate. Floor:
32119 if (x2 > x)
32120 x2 -= 1;
32121 Compensate. Ceil:
32122 if (x2 < x)
32123 x2 -= -1;
32124 return x2;
32125 */
32126 enum machine_mode mode = GET_MODE (operand0);
32127 rtx xa, TWO52, tmp, label, one, res, mask;
32128
32129 TWO52 = ix86_gen_TWO52 (mode);
32130
32131 /* Temporary for holding the result, initialized to the input
32132 operand to ease control flow. */
32133 res = gen_reg_rtx (mode);
32134 emit_move_insn (res, operand1);
32135
32136 /* xa = abs (operand1) */
32137 xa = ix86_expand_sse_fabs (res, &mask);
32138
32139 /* if (!isless (xa, TWO52)) goto label; */
32140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32141
32142 /* xa = xa + TWO52 - TWO52; */
32143 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32144 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32145
32146 /* xa = copysign (xa, operand1) */
32147 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32148
32149 /* generate 1.0 or -1.0 */
32150 one = force_reg (mode,
32151 const_double_from_real_value (do_floor
32152 ? dconst1 : dconstm1, mode));
32153
32154 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32155 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32156 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32157 gen_rtx_AND (mode, one, tmp)));
32158 /* We always need to subtract here to preserve signed zero. */
32159 tmp = expand_simple_binop (mode, MINUS,
32160 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32161 emit_move_insn (res, tmp);
32162
32163 emit_label (label);
32164 LABEL_NUSES (label) = 1;
32165
32166 emit_move_insn (operand0, res);
32167 }
32168
32169 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32170 into OPERAND0. */
32171 void
32172 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32173 {
32174 /* C code for the stuff we expand below.
32175 double xa = fabs (x), x2;
32176 if (!isless (xa, TWO52))
32177 return x;
32178 x2 = (double)(long)x;
32179 Compensate. Floor:
32180 if (x2 > x)
32181 x2 -= 1;
32182 Compensate. Ceil:
32183 if (x2 < x)
32184 x2 += 1;
32185 if (HONOR_SIGNED_ZEROS (mode))
32186 return copysign (x2, x);
32187 return x2;
32188 */
32189 enum machine_mode mode = GET_MODE (operand0);
32190 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32191
32192 TWO52 = ix86_gen_TWO52 (mode);
32193
32194 /* Temporary for holding the result, initialized to the input
32195 operand to ease control flow. */
32196 res = gen_reg_rtx (mode);
32197 emit_move_insn (res, operand1);
32198
32199 /* xa = abs (operand1) */
32200 xa = ix86_expand_sse_fabs (res, &mask);
32201
32202 /* if (!isless (xa, TWO52)) goto label; */
32203 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32204
32205 /* xa = (double)(long)x */
32206 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32207 expand_fix (xi, res, 0);
32208 expand_float (xa, xi, 0);
32209
32210 /* generate 1.0 */
32211 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32212
32213 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32214 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32215 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32216 gen_rtx_AND (mode, one, tmp)));
32217 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32218 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32219 emit_move_insn (res, tmp);
32220
32221 if (HONOR_SIGNED_ZEROS (mode))
32222 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32223
32224 emit_label (label);
32225 LABEL_NUSES (label) = 1;
32226
32227 emit_move_insn (operand0, res);
32228 }
32229
32230 /* Expand SSE sequence for computing round from OPERAND1 storing
32231 into OPERAND0. Sequence that works without relying on DImode truncation
32232 via cvttsd2siq that is only available on 64bit targets. */
32233 void
32234 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32235 {
32236 /* C code for the stuff we expand below.
32237 double xa = fabs (x), xa2, x2;
32238 if (!isless (xa, TWO52))
32239 return x;
32240 Using the absolute value and copying back sign makes
32241 -0.0 -> -0.0 correct.
32242 xa2 = xa + TWO52 - TWO52;
32243 Compensate.
32244 dxa = xa2 - xa;
32245 if (dxa <= -0.5)
32246 xa2 += 1;
32247 else if (dxa > 0.5)
32248 xa2 -= 1;
32249 x2 = copysign (xa2, x);
32250 return x2;
32251 */
32252 enum machine_mode mode = GET_MODE (operand0);
32253 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32254
32255 TWO52 = ix86_gen_TWO52 (mode);
32256
32257 /* Temporary for holding the result, initialized to the input
32258 operand to ease control flow. */
32259 res = gen_reg_rtx (mode);
32260 emit_move_insn (res, operand1);
32261
32262 /* xa = abs (operand1) */
32263 xa = ix86_expand_sse_fabs (res, &mask);
32264
32265 /* if (!isless (xa, TWO52)) goto label; */
32266 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32267
32268 /* xa2 = xa + TWO52 - TWO52; */
32269 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32270 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32271
32272 /* dxa = xa2 - xa; */
32273 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32274
32275 /* generate 0.5, 1.0 and -0.5 */
32276 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32277 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32278 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32279 0, OPTAB_DIRECT);
32280
32281 /* Compensate. */
32282 tmp = gen_reg_rtx (mode);
32283 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32284 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32285 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32286 gen_rtx_AND (mode, one, tmp)));
32287 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32288 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32289 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32290 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32291 gen_rtx_AND (mode, one, tmp)));
32292 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32293
32294 /* res = copysign (xa2, operand1) */
32295 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32296
32297 emit_label (label);
32298 LABEL_NUSES (label) = 1;
32299
32300 emit_move_insn (operand0, res);
32301 }
32302
32303 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32304 into OPERAND0. */
32305 void
32306 ix86_expand_trunc (rtx operand0, rtx operand1)
32307 {
32308 /* C code for SSE variant we expand below.
32309 double xa = fabs (x), x2;
32310 if (!isless (xa, TWO52))
32311 return x;
32312 x2 = (double)(long)x;
32313 if (HONOR_SIGNED_ZEROS (mode))
32314 return copysign (x2, x);
32315 return x2;
32316 */
32317 enum machine_mode mode = GET_MODE (operand0);
32318 rtx xa, xi, TWO52, label, res, mask;
32319
32320 TWO52 = ix86_gen_TWO52 (mode);
32321
32322 /* Temporary for holding the result, initialized to the input
32323 operand to ease control flow. */
32324 res = gen_reg_rtx (mode);
32325 emit_move_insn (res, operand1);
32326
32327 /* xa = abs (operand1) */
32328 xa = ix86_expand_sse_fabs (res, &mask);
32329
32330 /* if (!isless (xa, TWO52)) goto label; */
32331 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32332
32333 /* x = (double)(long)x */
32334 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32335 expand_fix (xi, res, 0);
32336 expand_float (res, xi, 0);
32337
32338 if (HONOR_SIGNED_ZEROS (mode))
32339 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32340
32341 emit_label (label);
32342 LABEL_NUSES (label) = 1;
32343
32344 emit_move_insn (operand0, res);
32345 }
32346
32347 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32348 into OPERAND0. */
32349 void
32350 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32351 {
32352 enum machine_mode mode = GET_MODE (operand0);
32353 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32354
32355 /* C code for SSE variant we expand below.
32356 double xa = fabs (x), x2;
32357 if (!isless (xa, TWO52))
32358 return x;
32359 xa2 = xa + TWO52 - TWO52;
32360 Compensate:
32361 if (xa2 > xa)
32362 xa2 -= 1.0;
32363 x2 = copysign (xa2, x);
32364 return x2;
32365 */
32366
32367 TWO52 = ix86_gen_TWO52 (mode);
32368
32369 /* Temporary for holding the result, initialized to the input
32370 operand to ease control flow. */
32371 res = gen_reg_rtx (mode);
32372 emit_move_insn (res, operand1);
32373
32374 /* xa = abs (operand1) */
32375 xa = ix86_expand_sse_fabs (res, &smask);
32376
32377 /* if (!isless (xa, TWO52)) goto label; */
32378 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32379
32380 /* res = xa + TWO52 - TWO52; */
32381 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32382 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32383 emit_move_insn (res, tmp);
32384
32385 /* generate 1.0 */
32386 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32387
32388 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32389 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32390 emit_insn (gen_rtx_SET (VOIDmode, mask,
32391 gen_rtx_AND (mode, mask, one)));
32392 tmp = expand_simple_binop (mode, MINUS,
32393 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32394 emit_move_insn (res, tmp);
32395
32396 /* res = copysign (res, operand1) */
32397 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32398
32399 emit_label (label);
32400 LABEL_NUSES (label) = 1;
32401
32402 emit_move_insn (operand0, res);
32403 }
32404
32405 /* Expand SSE sequence for computing round from OPERAND1 storing
32406 into OPERAND0. */
32407 void
32408 ix86_expand_round (rtx operand0, rtx operand1)
32409 {
32410 /* C code for the stuff we're doing below:
32411 double xa = fabs (x);
32412 if (!isless (xa, TWO52))
32413 return x;
32414 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32415 return copysign (xa, x);
32416 */
32417 enum machine_mode mode = GET_MODE (operand0);
32418 rtx res, TWO52, xa, label, xi, half, mask;
32419 const struct real_format *fmt;
32420 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32421
32422 /* Temporary for holding the result, initialized to the input
32423 operand to ease control flow. */
32424 res = gen_reg_rtx (mode);
32425 emit_move_insn (res, operand1);
32426
32427 TWO52 = ix86_gen_TWO52 (mode);
32428 xa = ix86_expand_sse_fabs (res, &mask);
32429 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32430
32431 /* load nextafter (0.5, 0.0) */
32432 fmt = REAL_MODE_FORMAT (mode);
32433 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32434 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32435
32436 /* xa = xa + 0.5 */
32437 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32438 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32439
32440 /* xa = (double)(int64_t)xa */
32441 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32442 expand_fix (xi, xa, 0);
32443 expand_float (xa, xi, 0);
32444
32445 /* res = copysign (xa, operand1) */
32446 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32447
32448 emit_label (label);
32449 LABEL_NUSES (label) = 1;
32450
32451 emit_move_insn (operand0, res);
32452 }
32453 \f
32454
32455 /* Table of valid machine attributes. */
32456 static const struct attribute_spec ix86_attribute_table[] =
32457 {
32458 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32459 affects_type_identity } */
32460 /* Stdcall attribute says callee is responsible for popping arguments
32461 if they are not variable. */
32462 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32463 true },
32464 /* Fastcall attribute says callee is responsible for popping arguments
32465 if they are not variable. */
32466 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32467 true },
32468 /* Thiscall attribute says callee is responsible for popping arguments
32469 if they are not variable. */
32470 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32471 true },
32472 /* Cdecl attribute says the callee is a normal C declaration */
32473 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32474 true },
32475 /* Regparm attribute specifies how many integer arguments are to be
32476 passed in registers. */
32477 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32478 true },
32479 /* Sseregparm attribute says we are using x86_64 calling conventions
32480 for FP arguments. */
32481 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32482 true },
32483 /* force_align_arg_pointer says this function realigns the stack at entry. */
32484 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32485 false, true, true, ix86_handle_cconv_attribute, false },
32486 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32487 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32488 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32489 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32490 false },
32491 #endif
32492 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32493 false },
32494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32495 false },
32496 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32497 SUBTARGET_ATTRIBUTE_TABLE,
32498 #endif
32499 /* ms_abi and sysv_abi calling convention function attributes. */
32500 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32501 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32502 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32503 false },
32504 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32505 ix86_handle_callee_pop_aggregate_return, true },
32506 /* End element. */
32507 { NULL, 0, 0, false, false, false, NULL, false }
32508 };
32509
32510 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32511 static int
32512 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32513 tree vectype ATTRIBUTE_UNUSED,
32514 int misalign ATTRIBUTE_UNUSED)
32515 {
32516 switch (type_of_cost)
32517 {
32518 case scalar_stmt:
32519 return ix86_cost->scalar_stmt_cost;
32520
32521 case scalar_load:
32522 return ix86_cost->scalar_load_cost;
32523
32524 case scalar_store:
32525 return ix86_cost->scalar_store_cost;
32526
32527 case vector_stmt:
32528 return ix86_cost->vec_stmt_cost;
32529
32530 case vector_load:
32531 return ix86_cost->vec_align_load_cost;
32532
32533 case vector_store:
32534 return ix86_cost->vec_store_cost;
32535
32536 case vec_to_scalar:
32537 return ix86_cost->vec_to_scalar_cost;
32538
32539 case scalar_to_vec:
32540 return ix86_cost->scalar_to_vec_cost;
32541
32542 case unaligned_load:
32543 case unaligned_store:
32544 return ix86_cost->vec_unalign_load_cost;
32545
32546 case cond_branch_taken:
32547 return ix86_cost->cond_taken_branch_cost;
32548
32549 case cond_branch_not_taken:
32550 return ix86_cost->cond_not_taken_branch_cost;
32551
32552 case vec_perm:
32553 return 1;
32554
32555 default:
32556 gcc_unreachable ();
32557 }
32558 }
32559
32560
32561 /* Implement targetm.vectorize.builtin_vec_perm. */
32562
32563 static tree
32564 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32565 {
32566 tree itype = TREE_TYPE (vec_type);
32567 bool u = TYPE_UNSIGNED (itype);
32568 enum machine_mode vmode = TYPE_MODE (vec_type);
32569 enum ix86_builtins fcode;
32570 bool ok = TARGET_SSE2;
32571
32572 switch (vmode)
32573 {
32574 case V4DFmode:
32575 ok = TARGET_AVX;
32576 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32577 goto get_di;
32578 case V2DFmode:
32579 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32580 get_di:
32581 itype = ix86_get_builtin_type (IX86_BT_DI);
32582 break;
32583
32584 case V8SFmode:
32585 ok = TARGET_AVX;
32586 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32587 goto get_si;
32588 case V4SFmode:
32589 ok = TARGET_SSE;
32590 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32591 get_si:
32592 itype = ix86_get_builtin_type (IX86_BT_SI);
32593 break;
32594
32595 case V2DImode:
32596 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32597 break;
32598 case V4SImode:
32599 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32600 break;
32601 case V8HImode:
32602 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32603 break;
32604 case V16QImode:
32605 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32606 break;
32607 default:
32608 ok = false;
32609 break;
32610 }
32611
32612 if (!ok)
32613 return NULL_TREE;
32614
32615 *mask_type = itype;
32616 return ix86_builtins[(int) fcode];
32617 }
32618
32619 /* Return a vector mode with twice as many elements as VMODE. */
32620 /* ??? Consider moving this to a table generated by genmodes.c. */
32621
32622 static enum machine_mode
32623 doublesize_vector_mode (enum machine_mode vmode)
32624 {
32625 switch (vmode)
32626 {
32627 case V2SFmode: return V4SFmode;
32628 case V1DImode: return V2DImode;
32629 case V2SImode: return V4SImode;
32630 case V4HImode: return V8HImode;
32631 case V8QImode: return V16QImode;
32632
32633 case V2DFmode: return V4DFmode;
32634 case V4SFmode: return V8SFmode;
32635 case V2DImode: return V4DImode;
32636 case V4SImode: return V8SImode;
32637 case V8HImode: return V16HImode;
32638 case V16QImode: return V32QImode;
32639
32640 case V4DFmode: return V8DFmode;
32641 case V8SFmode: return V16SFmode;
32642 case V4DImode: return V8DImode;
32643 case V8SImode: return V16SImode;
32644 case V16HImode: return V32HImode;
32645 case V32QImode: return V64QImode;
32646
32647 default:
32648 gcc_unreachable ();
32649 }
32650 }
32651
32652 /* Construct (set target (vec_select op0 (parallel perm))) and
32653 return true if that's a valid instruction in the active ISA. */
32654
32655 static bool
32656 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32657 {
32658 rtx rperm[MAX_VECT_LEN], x;
32659 unsigned i;
32660
32661 for (i = 0; i < nelt; ++i)
32662 rperm[i] = GEN_INT (perm[i]);
32663
32664 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32665 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32666 x = gen_rtx_SET (VOIDmode, target, x);
32667
32668 x = emit_insn (x);
32669 if (recog_memoized (x) < 0)
32670 {
32671 remove_insn (x);
32672 return false;
32673 }
32674 return true;
32675 }
32676
32677 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32678
32679 static bool
32680 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32681 const unsigned char *perm, unsigned nelt)
32682 {
32683 enum machine_mode v2mode;
32684 rtx x;
32685
32686 v2mode = doublesize_vector_mode (GET_MODE (op0));
32687 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32688 return expand_vselect (target, x, perm, nelt);
32689 }
32690
32691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32692 in terms of blendp[sd] / pblendw / pblendvb. */
32693
32694 static bool
32695 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32696 {
32697 enum machine_mode vmode = d->vmode;
32698 unsigned i, mask, nelt = d->nelt;
32699 rtx target, op0, op1, x;
32700
32701 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32702 return false;
32703 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32704 return false;
32705
32706 /* This is a blend, not a permute. Elements must stay in their
32707 respective lanes. */
32708 for (i = 0; i < nelt; ++i)
32709 {
32710 unsigned e = d->perm[i];
32711 if (!(e == i || e == i + nelt))
32712 return false;
32713 }
32714
32715 if (d->testing_p)
32716 return true;
32717
32718 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32719 decision should be extracted elsewhere, so that we only try that
32720 sequence once all budget==3 options have been tried. */
32721
32722 /* For bytes, see if bytes move in pairs so we can use pblendw with
32723 an immediate argument, rather than pblendvb with a vector argument. */
32724 if (vmode == V16QImode)
32725 {
32726 bool pblendw_ok = true;
32727 for (i = 0; i < 16 && pblendw_ok; i += 2)
32728 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32729
32730 if (!pblendw_ok)
32731 {
32732 rtx rperm[16], vperm;
32733
32734 for (i = 0; i < nelt; ++i)
32735 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32736
32737 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32738 vperm = force_reg (V16QImode, vperm);
32739
32740 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32741 return true;
32742 }
32743 }
32744
32745 target = d->target;
32746 op0 = d->op0;
32747 op1 = d->op1;
32748 mask = 0;
32749
32750 switch (vmode)
32751 {
32752 case V4DFmode:
32753 case V8SFmode:
32754 case V2DFmode:
32755 case V4SFmode:
32756 case V8HImode:
32757 for (i = 0; i < nelt; ++i)
32758 mask |= (d->perm[i] >= nelt) << i;
32759 break;
32760
32761 case V2DImode:
32762 for (i = 0; i < 2; ++i)
32763 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32764 goto do_subreg;
32765
32766 case V4SImode:
32767 for (i = 0; i < 4; ++i)
32768 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32769 goto do_subreg;
32770
32771 case V16QImode:
32772 for (i = 0; i < 8; ++i)
32773 mask |= (d->perm[i * 2] >= 16) << i;
32774
32775 do_subreg:
32776 vmode = V8HImode;
32777 target = gen_lowpart (vmode, target);
32778 op0 = gen_lowpart (vmode, op0);
32779 op1 = gen_lowpart (vmode, op1);
32780 break;
32781
32782 default:
32783 gcc_unreachable ();
32784 }
32785
32786 /* This matches five different patterns with the different modes. */
32787 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32788 x = gen_rtx_SET (VOIDmode, target, x);
32789 emit_insn (x);
32790
32791 return true;
32792 }
32793
32794 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32795 in terms of the variable form of vpermilps.
32796
32797 Note that we will have already failed the immediate input vpermilps,
32798 which requires that the high and low part shuffle be identical; the
32799 variable form doesn't require that. */
32800
32801 static bool
32802 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32803 {
32804 rtx rperm[8], vperm;
32805 unsigned i;
32806
32807 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32808 return false;
32809
32810 /* We can only permute within the 128-bit lane. */
32811 for (i = 0; i < 8; ++i)
32812 {
32813 unsigned e = d->perm[i];
32814 if (i < 4 ? e >= 4 : e < 4)
32815 return false;
32816 }
32817
32818 if (d->testing_p)
32819 return true;
32820
32821 for (i = 0; i < 8; ++i)
32822 {
32823 unsigned e = d->perm[i];
32824
32825 /* Within each 128-bit lane, the elements of op0 are numbered
32826 from 0 and the elements of op1 are numbered from 4. */
32827 if (e >= 8 + 4)
32828 e -= 8;
32829 else if (e >= 4)
32830 e -= 4;
32831
32832 rperm[i] = GEN_INT (e);
32833 }
32834
32835 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32836 vperm = force_reg (V8SImode, vperm);
32837 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32838
32839 return true;
32840 }
32841
32842 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32843 in terms of pshufb or vpperm. */
32844
32845 static bool
32846 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32847 {
32848 unsigned i, nelt, eltsz;
32849 rtx rperm[16], vperm, target, op0, op1;
32850
32851 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32852 return false;
32853 if (GET_MODE_SIZE (d->vmode) != 16)
32854 return false;
32855
32856 if (d->testing_p)
32857 return true;
32858
32859 nelt = d->nelt;
32860 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32861
32862 for (i = 0; i < nelt; ++i)
32863 {
32864 unsigned j, e = d->perm[i];
32865 for (j = 0; j < eltsz; ++j)
32866 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32867 }
32868
32869 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32870 vperm = force_reg (V16QImode, vperm);
32871
32872 target = gen_lowpart (V16QImode, d->target);
32873 op0 = gen_lowpart (V16QImode, d->op0);
32874 if (d->op0 == d->op1)
32875 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32876 else
32877 {
32878 op1 = gen_lowpart (V16QImode, d->op1);
32879 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32880 }
32881
32882 return true;
32883 }
32884
32885 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32886 in a single instruction. */
32887
32888 static bool
32889 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32890 {
32891 unsigned i, nelt = d->nelt;
32892 unsigned char perm2[MAX_VECT_LEN];
32893
32894 /* Check plain VEC_SELECT first, because AVX has instructions that could
32895 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32896 input where SEL+CONCAT may not. */
32897 if (d->op0 == d->op1)
32898 {
32899 int mask = nelt - 1;
32900
32901 for (i = 0; i < nelt; i++)
32902 perm2[i] = d->perm[i] & mask;
32903
32904 if (expand_vselect (d->target, d->op0, perm2, nelt))
32905 return true;
32906
32907 /* There are plenty of patterns in sse.md that are written for
32908 SEL+CONCAT and are not replicated for a single op. Perhaps
32909 that should be changed, to avoid the nastiness here. */
32910
32911 /* Recognize interleave style patterns, which means incrementing
32912 every other permutation operand. */
32913 for (i = 0; i < nelt; i += 2)
32914 {
32915 perm2[i] = d->perm[i] & mask;
32916 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32917 }
32918 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32919 return true;
32920
32921 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32922 if (nelt >= 4)
32923 {
32924 for (i = 0; i < nelt; i += 4)
32925 {
32926 perm2[i + 0] = d->perm[i + 0] & mask;
32927 perm2[i + 1] = d->perm[i + 1] & mask;
32928 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32929 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32930 }
32931
32932 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32933 return true;
32934 }
32935 }
32936
32937 /* Finally, try the fully general two operand permute. */
32938 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32939 return true;
32940
32941 /* Recognize interleave style patterns with reversed operands. */
32942 if (d->op0 != d->op1)
32943 {
32944 for (i = 0; i < nelt; ++i)
32945 {
32946 unsigned e = d->perm[i];
32947 if (e >= nelt)
32948 e -= nelt;
32949 else
32950 e += nelt;
32951 perm2[i] = e;
32952 }
32953
32954 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32955 return true;
32956 }
32957
32958 /* Try the SSE4.1 blend variable merge instructions. */
32959 if (expand_vec_perm_blend (d))
32960 return true;
32961
32962 /* Try one of the AVX vpermil variable permutations. */
32963 if (expand_vec_perm_vpermil (d))
32964 return true;
32965
32966 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32967 if (expand_vec_perm_pshufb (d))
32968 return true;
32969
32970 return false;
32971 }
32972
32973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32974 in terms of a pair of pshuflw + pshufhw instructions. */
32975
32976 static bool
32977 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32978 {
32979 unsigned char perm2[MAX_VECT_LEN];
32980 unsigned i;
32981 bool ok;
32982
32983 if (d->vmode != V8HImode || d->op0 != d->op1)
32984 return false;
32985
32986 /* The two permutations only operate in 64-bit lanes. */
32987 for (i = 0; i < 4; ++i)
32988 if (d->perm[i] >= 4)
32989 return false;
32990 for (i = 4; i < 8; ++i)
32991 if (d->perm[i] < 4)
32992 return false;
32993
32994 if (d->testing_p)
32995 return true;
32996
32997 /* Emit the pshuflw. */
32998 memcpy (perm2, d->perm, 4);
32999 for (i = 4; i < 8; ++i)
33000 perm2[i] = i;
33001 ok = expand_vselect (d->target, d->op0, perm2, 8);
33002 gcc_assert (ok);
33003
33004 /* Emit the pshufhw. */
33005 memcpy (perm2 + 4, d->perm + 4, 4);
33006 for (i = 0; i < 4; ++i)
33007 perm2[i] = i;
33008 ok = expand_vselect (d->target, d->target, perm2, 8);
33009 gcc_assert (ok);
33010
33011 return true;
33012 }
33013
33014 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33015 the permutation using the SSSE3 palignr instruction. This succeeds
33016 when all of the elements in PERM fit within one vector and we merely
33017 need to shift them down so that a single vector permutation has a
33018 chance to succeed. */
33019
33020 static bool
33021 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33022 {
33023 unsigned i, nelt = d->nelt;
33024 unsigned min, max;
33025 bool in_order, ok;
33026 rtx shift;
33027
33028 /* Even with AVX, palignr only operates on 128-bit vectors. */
33029 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33030 return false;
33031
33032 min = nelt, max = 0;
33033 for (i = 0; i < nelt; ++i)
33034 {
33035 unsigned e = d->perm[i];
33036 if (e < min)
33037 min = e;
33038 if (e > max)
33039 max = e;
33040 }
33041 if (min == 0 || max - min >= nelt)
33042 return false;
33043
33044 /* Given that we have SSSE3, we know we'll be able to implement the
33045 single operand permutation after the palignr with pshufb. */
33046 if (d->testing_p)
33047 return true;
33048
33049 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33050 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33051 gen_lowpart (TImode, d->op1),
33052 gen_lowpart (TImode, d->op0), shift));
33053
33054 d->op0 = d->op1 = d->target;
33055
33056 in_order = true;
33057 for (i = 0; i < nelt; ++i)
33058 {
33059 unsigned e = d->perm[i] - min;
33060 if (e != i)
33061 in_order = false;
33062 d->perm[i] = e;
33063 }
33064
33065 /* Test for the degenerate case where the alignment by itself
33066 produces the desired permutation. */
33067 if (in_order)
33068 return true;
33069
33070 ok = expand_vec_perm_1 (d);
33071 gcc_assert (ok);
33072
33073 return ok;
33074 }
33075
33076 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33077 a two vector permutation into a single vector permutation by using
33078 an interleave operation to merge the vectors. */
33079
33080 static bool
33081 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33082 {
33083 struct expand_vec_perm_d dremap, dfinal;
33084 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33085 unsigned contents, h1, h2, h3, h4;
33086 unsigned char remap[2 * MAX_VECT_LEN];
33087 rtx seq;
33088 bool ok;
33089
33090 if (d->op0 == d->op1)
33091 return false;
33092
33093 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33094 lanes. We can use similar techniques with the vperm2f128 instruction,
33095 but it requires slightly different logic. */
33096 if (GET_MODE_SIZE (d->vmode) != 16)
33097 return false;
33098
33099 /* Examine from whence the elements come. */
33100 contents = 0;
33101 for (i = 0; i < nelt; ++i)
33102 contents |= 1u << d->perm[i];
33103
33104 /* Split the two input vectors into 4 halves. */
33105 h1 = (1u << nelt2) - 1;
33106 h2 = h1 << nelt2;
33107 h3 = h2 << nelt2;
33108 h4 = h3 << nelt2;
33109
33110 memset (remap, 0xff, sizeof (remap));
33111 dremap = *d;
33112
33113 /* If the elements from the low halves use interleave low, and similarly
33114 for interleave high. If the elements are from mis-matched halves, we
33115 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33116 if ((contents & (h1 | h3)) == contents)
33117 {
33118 for (i = 0; i < nelt2; ++i)
33119 {
33120 remap[i] = i * 2;
33121 remap[i + nelt] = i * 2 + 1;
33122 dremap.perm[i * 2] = i;
33123 dremap.perm[i * 2 + 1] = i + nelt;
33124 }
33125 }
33126 else if ((contents & (h2 | h4)) == contents)
33127 {
33128 for (i = 0; i < nelt2; ++i)
33129 {
33130 remap[i + nelt2] = i * 2;
33131 remap[i + nelt + nelt2] = i * 2 + 1;
33132 dremap.perm[i * 2] = i + nelt2;
33133 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33134 }
33135 }
33136 else if ((contents & (h1 | h4)) == contents)
33137 {
33138 for (i = 0; i < nelt2; ++i)
33139 {
33140 remap[i] = i;
33141 remap[i + nelt + nelt2] = i + nelt2;
33142 dremap.perm[i] = i;
33143 dremap.perm[i + nelt2] = i + nelt + nelt2;
33144 }
33145 if (nelt != 4)
33146 {
33147 dremap.vmode = V2DImode;
33148 dremap.nelt = 2;
33149 dremap.perm[0] = 0;
33150 dremap.perm[1] = 3;
33151 }
33152 }
33153 else if ((contents & (h2 | h3)) == contents)
33154 {
33155 for (i = 0; i < nelt2; ++i)
33156 {
33157 remap[i + nelt2] = i;
33158 remap[i + nelt] = i + nelt2;
33159 dremap.perm[i] = i + nelt2;
33160 dremap.perm[i + nelt2] = i + nelt;
33161 }
33162 if (nelt != 4)
33163 {
33164 dremap.vmode = V2DImode;
33165 dremap.nelt = 2;
33166 dremap.perm[0] = 1;
33167 dremap.perm[1] = 2;
33168 }
33169 }
33170 else
33171 return false;
33172
33173 /* Use the remapping array set up above to move the elements from their
33174 swizzled locations into their final destinations. */
33175 dfinal = *d;
33176 for (i = 0; i < nelt; ++i)
33177 {
33178 unsigned e = remap[d->perm[i]];
33179 gcc_assert (e < nelt);
33180 dfinal.perm[i] = e;
33181 }
33182 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33183 dfinal.op1 = dfinal.op0;
33184 dremap.target = dfinal.op0;
33185
33186 /* Test if the final remap can be done with a single insn. For V4SFmode or
33187 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33188 start_sequence ();
33189 ok = expand_vec_perm_1 (&dfinal);
33190 seq = get_insns ();
33191 end_sequence ();
33192
33193 if (!ok)
33194 return false;
33195
33196 if (dremap.vmode != dfinal.vmode)
33197 {
33198 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33199 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33200 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33201 }
33202
33203 ok = expand_vec_perm_1 (&dremap);
33204 gcc_assert (ok);
33205
33206 emit_insn (seq);
33207 return true;
33208 }
33209
33210 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33211 permutation with two pshufb insns and an ior. We should have already
33212 failed all two instruction sequences. */
33213
33214 static bool
33215 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33216 {
33217 rtx rperm[2][16], vperm, l, h, op, m128;
33218 unsigned int i, nelt, eltsz;
33219
33220 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33221 return false;
33222 gcc_assert (d->op0 != d->op1);
33223
33224 nelt = d->nelt;
33225 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33226
33227 /* Generate two permutation masks. If the required element is within
33228 the given vector it is shuffled into the proper lane. If the required
33229 element is in the other vector, force a zero into the lane by setting
33230 bit 7 in the permutation mask. */
33231 m128 = GEN_INT (-128);
33232 for (i = 0; i < nelt; ++i)
33233 {
33234 unsigned j, e = d->perm[i];
33235 unsigned which = (e >= nelt);
33236 if (e >= nelt)
33237 e -= nelt;
33238
33239 for (j = 0; j < eltsz; ++j)
33240 {
33241 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33242 rperm[1-which][i*eltsz + j] = m128;
33243 }
33244 }
33245
33246 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33247 vperm = force_reg (V16QImode, vperm);
33248
33249 l = gen_reg_rtx (V16QImode);
33250 op = gen_lowpart (V16QImode, d->op0);
33251 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33252
33253 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33254 vperm = force_reg (V16QImode, vperm);
33255
33256 h = gen_reg_rtx (V16QImode);
33257 op = gen_lowpart (V16QImode, d->op1);
33258 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33259
33260 op = gen_lowpart (V16QImode, d->target);
33261 emit_insn (gen_iorv16qi3 (op, l, h));
33262
33263 return true;
33264 }
33265
33266 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33267 and extract-odd permutations. */
33268
33269 static bool
33270 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33271 {
33272 rtx t1, t2, t3;
33273
33274 switch (d->vmode)
33275 {
33276 case V4DFmode:
33277 t1 = gen_reg_rtx (V4DFmode);
33278 t2 = gen_reg_rtx (V4DFmode);
33279
33280 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33281 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33282 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33283
33284 /* Now an unpck[lh]pd will produce the result required. */
33285 if (odd)
33286 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33287 else
33288 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33289 emit_insn (t3);
33290 break;
33291
33292 case V8SFmode:
33293 {
33294 int mask = odd ? 0xdd : 0x88;
33295
33296 t1 = gen_reg_rtx (V8SFmode);
33297 t2 = gen_reg_rtx (V8SFmode);
33298 t3 = gen_reg_rtx (V8SFmode);
33299
33300 /* Shuffle within the 128-bit lanes to produce:
33301 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33302 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33303 GEN_INT (mask)));
33304
33305 /* Shuffle the lanes around to produce:
33306 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33307 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33308 GEN_INT (0x3)));
33309
33310 /* Shuffle within the 128-bit lanes to produce:
33311 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33312 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33313
33314 /* Shuffle within the 128-bit lanes to produce:
33315 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33316 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33317
33318 /* Shuffle the lanes around to produce:
33319 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33320 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33321 GEN_INT (0x20)));
33322 }
33323 break;
33324
33325 case V2DFmode:
33326 case V4SFmode:
33327 case V2DImode:
33328 case V4SImode:
33329 /* These are always directly implementable by expand_vec_perm_1. */
33330 gcc_unreachable ();
33331
33332 case V8HImode:
33333 if (TARGET_SSSE3)
33334 return expand_vec_perm_pshufb2 (d);
33335 else
33336 {
33337 /* We need 2*log2(N)-1 operations to achieve odd/even
33338 with interleave. */
33339 t1 = gen_reg_rtx (V8HImode);
33340 t2 = gen_reg_rtx (V8HImode);
33341 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33342 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33343 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33344 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33345 if (odd)
33346 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33347 else
33348 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33349 emit_insn (t3);
33350 }
33351 break;
33352
33353 case V16QImode:
33354 if (TARGET_SSSE3)
33355 return expand_vec_perm_pshufb2 (d);
33356 else
33357 {
33358 t1 = gen_reg_rtx (V16QImode);
33359 t2 = gen_reg_rtx (V16QImode);
33360 t3 = gen_reg_rtx (V16QImode);
33361 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33362 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33363 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33364 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33365 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33366 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33367 if (odd)
33368 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33369 else
33370 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33371 emit_insn (t3);
33372 }
33373 break;
33374
33375 default:
33376 gcc_unreachable ();
33377 }
33378
33379 return true;
33380 }
33381
33382 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33383 extract-even and extract-odd permutations. */
33384
33385 static bool
33386 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33387 {
33388 unsigned i, odd, nelt = d->nelt;
33389
33390 odd = d->perm[0];
33391 if (odd != 0 && odd != 1)
33392 return false;
33393
33394 for (i = 1; i < nelt; ++i)
33395 if (d->perm[i] != 2 * i + odd)
33396 return false;
33397
33398 return expand_vec_perm_even_odd_1 (d, odd);
33399 }
33400
33401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33402 permutations. We assume that expand_vec_perm_1 has already failed. */
33403
33404 static bool
33405 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33406 {
33407 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33408 enum machine_mode vmode = d->vmode;
33409 unsigned char perm2[4];
33410 rtx op0 = d->op0;
33411 bool ok;
33412
33413 switch (vmode)
33414 {
33415 case V4DFmode:
33416 case V8SFmode:
33417 /* These are special-cased in sse.md so that we can optionally
33418 use the vbroadcast instruction. They expand to two insns
33419 if the input happens to be in a register. */
33420 gcc_unreachable ();
33421
33422 case V2DFmode:
33423 case V2DImode:
33424 case V4SFmode:
33425 case V4SImode:
33426 /* These are always implementable using standard shuffle patterns. */
33427 gcc_unreachable ();
33428
33429 case V8HImode:
33430 case V16QImode:
33431 /* These can be implemented via interleave. We save one insn by
33432 stopping once we have promoted to V4SImode and then use pshufd. */
33433 do
33434 {
33435 optab otab = vec_interleave_low_optab;
33436
33437 if (elt >= nelt2)
33438 {
33439 otab = vec_interleave_high_optab;
33440 elt -= nelt2;
33441 }
33442 nelt2 /= 2;
33443
33444 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33445 vmode = get_mode_wider_vector (vmode);
33446 op0 = gen_lowpart (vmode, op0);
33447 }
33448 while (vmode != V4SImode);
33449
33450 memset (perm2, elt, 4);
33451 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33452 gcc_assert (ok);
33453 return true;
33454
33455 default:
33456 gcc_unreachable ();
33457 }
33458 }
33459
33460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33461 broadcast permutations. */
33462
33463 static bool
33464 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33465 {
33466 unsigned i, elt, nelt = d->nelt;
33467
33468 if (d->op0 != d->op1)
33469 return false;
33470
33471 elt = d->perm[0];
33472 for (i = 1; i < nelt; ++i)
33473 if (d->perm[i] != elt)
33474 return false;
33475
33476 return expand_vec_perm_broadcast_1 (d);
33477 }
33478
33479 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33480 With all of the interface bits taken care of, perform the expansion
33481 in D and return true on success. */
33482
33483 static bool
33484 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33485 {
33486 /* Try a single instruction expansion. */
33487 if (expand_vec_perm_1 (d))
33488 return true;
33489
33490 /* Try sequences of two instructions. */
33491
33492 if (expand_vec_perm_pshuflw_pshufhw (d))
33493 return true;
33494
33495 if (expand_vec_perm_palignr (d))
33496 return true;
33497
33498 if (expand_vec_perm_interleave2 (d))
33499 return true;
33500
33501 if (expand_vec_perm_broadcast (d))
33502 return true;
33503
33504 /* Try sequences of three instructions. */
33505
33506 if (expand_vec_perm_pshufb2 (d))
33507 return true;
33508
33509 /* ??? Look for narrow permutations whose element orderings would
33510 allow the promotion to a wider mode. */
33511
33512 /* ??? Look for sequences of interleave or a wider permute that place
33513 the data into the correct lanes for a half-vector shuffle like
33514 pshuf[lh]w or vpermilps. */
33515
33516 /* ??? Look for sequences of interleave that produce the desired results.
33517 The combinatorics of punpck[lh] get pretty ugly... */
33518
33519 if (expand_vec_perm_even_odd (d))
33520 return true;
33521
33522 return false;
33523 }
33524
33525 /* Extract the values from the vector CST into the permutation array in D.
33526 Return 0 on error, 1 if all values from the permutation come from the
33527 first vector, 2 if all values from the second vector, and 3 otherwise. */
33528
33529 static int
33530 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33531 {
33532 tree list = TREE_VECTOR_CST_ELTS (cst);
33533 unsigned i, nelt = d->nelt;
33534 int ret = 0;
33535
33536 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33537 {
33538 unsigned HOST_WIDE_INT e;
33539
33540 if (!host_integerp (TREE_VALUE (list), 1))
33541 return 0;
33542 e = tree_low_cst (TREE_VALUE (list), 1);
33543 if (e >= 2 * nelt)
33544 return 0;
33545
33546 ret |= (e < nelt ? 1 : 2);
33547 d->perm[i] = e;
33548 }
33549 gcc_assert (list == NULL);
33550
33551 /* For all elements from second vector, fold the elements to first. */
33552 if (ret == 2)
33553 for (i = 0; i < nelt; ++i)
33554 d->perm[i] -= nelt;
33555
33556 return ret;
33557 }
33558
33559 static rtx
33560 ix86_expand_vec_perm_builtin (tree exp)
33561 {
33562 struct expand_vec_perm_d d;
33563 tree arg0, arg1, arg2;
33564
33565 arg0 = CALL_EXPR_ARG (exp, 0);
33566 arg1 = CALL_EXPR_ARG (exp, 1);
33567 arg2 = CALL_EXPR_ARG (exp, 2);
33568
33569 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33570 d.nelt = GET_MODE_NUNITS (d.vmode);
33571 d.testing_p = false;
33572 gcc_assert (VECTOR_MODE_P (d.vmode));
33573
33574 if (TREE_CODE (arg2) != VECTOR_CST)
33575 {
33576 error_at (EXPR_LOCATION (exp),
33577 "vector permutation requires vector constant");
33578 goto exit_error;
33579 }
33580
33581 switch (extract_vec_perm_cst (&d, arg2))
33582 {
33583 default:
33584 gcc_unreachable();
33585
33586 case 0:
33587 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33588 goto exit_error;
33589
33590 case 3:
33591 if (!operand_equal_p (arg0, arg1, 0))
33592 {
33593 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33594 d.op0 = force_reg (d.vmode, d.op0);
33595 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33596 d.op1 = force_reg (d.vmode, d.op1);
33597 break;
33598 }
33599
33600 /* The elements of PERM do not suggest that only the first operand
33601 is used, but both operands are identical. Allow easier matching
33602 of the permutation by folding the permutation into the single
33603 input vector. */
33604 {
33605 unsigned i, nelt = d.nelt;
33606 for (i = 0; i < nelt; ++i)
33607 if (d.perm[i] >= nelt)
33608 d.perm[i] -= nelt;
33609 }
33610 /* FALLTHRU */
33611
33612 case 1:
33613 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33614 d.op0 = force_reg (d.vmode, d.op0);
33615 d.op1 = d.op0;
33616 break;
33617
33618 case 2:
33619 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33620 d.op0 = force_reg (d.vmode, d.op0);
33621 d.op1 = d.op0;
33622 break;
33623 }
33624
33625 d.target = gen_reg_rtx (d.vmode);
33626 if (ix86_expand_vec_perm_builtin_1 (&d))
33627 return d.target;
33628
33629 /* For compiler generated permutations, we should never got here, because
33630 the compiler should also be checking the ok hook. But since this is a
33631 builtin the user has access too, so don't abort. */
33632 switch (d.nelt)
33633 {
33634 case 2:
33635 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33636 break;
33637 case 4:
33638 sorry ("vector permutation (%d %d %d %d)",
33639 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33640 break;
33641 case 8:
33642 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33643 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33644 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33645 break;
33646 case 16:
33647 sorry ("vector permutation "
33648 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33649 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33650 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33651 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33652 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33653 break;
33654 default:
33655 gcc_unreachable ();
33656 }
33657 exit_error:
33658 return CONST0_RTX (d.vmode);
33659 }
33660
33661 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33662
33663 static bool
33664 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33665 {
33666 struct expand_vec_perm_d d;
33667 int vec_mask;
33668 bool ret, one_vec;
33669
33670 d.vmode = TYPE_MODE (vec_type);
33671 d.nelt = GET_MODE_NUNITS (d.vmode);
33672 d.testing_p = true;
33673
33674 /* Given sufficient ISA support we can just return true here
33675 for selected vector modes. */
33676 if (GET_MODE_SIZE (d.vmode) == 16)
33677 {
33678 /* All implementable with a single vpperm insn. */
33679 if (TARGET_XOP)
33680 return true;
33681 /* All implementable with 2 pshufb + 1 ior. */
33682 if (TARGET_SSSE3)
33683 return true;
33684 /* All implementable with shufpd or unpck[lh]pd. */
33685 if (d.nelt == 2)
33686 return true;
33687 }
33688
33689 vec_mask = extract_vec_perm_cst (&d, mask);
33690
33691 /* This hook is cannot be called in response to something that the
33692 user does (unlike the builtin expander) so we shouldn't ever see
33693 an error generated from the extract. */
33694 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33695 one_vec = (vec_mask != 3);
33696
33697 /* Implementable with shufps or pshufd. */
33698 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33699 return true;
33700
33701 /* Otherwise we have to go through the motions and see if we can
33702 figure out how to generate the requested permutation. */
33703 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33704 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33705 if (!one_vec)
33706 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33707
33708 start_sequence ();
33709 ret = ix86_expand_vec_perm_builtin_1 (&d);
33710 end_sequence ();
33711
33712 return ret;
33713 }
33714
33715 void
33716 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33717 {
33718 struct expand_vec_perm_d d;
33719 unsigned i, nelt;
33720
33721 d.target = targ;
33722 d.op0 = op0;
33723 d.op1 = op1;
33724 d.vmode = GET_MODE (targ);
33725 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33726 d.testing_p = false;
33727
33728 for (i = 0; i < nelt; ++i)
33729 d.perm[i] = i * 2 + odd;
33730
33731 /* We'll either be able to implement the permutation directly... */
33732 if (expand_vec_perm_1 (&d))
33733 return;
33734
33735 /* ... or we use the special-case patterns. */
33736 expand_vec_perm_even_odd_1 (&d, odd);
33737 }
33738
33739 /* Expand an insert into a vector register through pinsr insn.
33740 Return true if successful. */
33741
33742 bool
33743 ix86_expand_pinsr (rtx *operands)
33744 {
33745 rtx dst = operands[0];
33746 rtx src = operands[3];
33747
33748 unsigned int size = INTVAL (operands[1]);
33749 unsigned int pos = INTVAL (operands[2]);
33750
33751 if (GET_CODE (dst) == SUBREG)
33752 {
33753 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33754 dst = SUBREG_REG (dst);
33755 }
33756
33757 if (GET_CODE (src) == SUBREG)
33758 src = SUBREG_REG (src);
33759
33760 switch (GET_MODE (dst))
33761 {
33762 case V16QImode:
33763 case V8HImode:
33764 case V4SImode:
33765 case V2DImode:
33766 {
33767 enum machine_mode srcmode, dstmode;
33768 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33769
33770 srcmode = mode_for_size (size, MODE_INT, 0);
33771
33772 switch (srcmode)
33773 {
33774 case QImode:
33775 if (!TARGET_SSE4_1)
33776 return false;
33777 dstmode = V16QImode;
33778 pinsr = gen_sse4_1_pinsrb;
33779 break;
33780
33781 case HImode:
33782 if (!TARGET_SSE2)
33783 return false;
33784 dstmode = V8HImode;
33785 pinsr = gen_sse2_pinsrw;
33786 break;
33787
33788 case SImode:
33789 if (!TARGET_SSE4_1)
33790 return false;
33791 dstmode = V4SImode;
33792 pinsr = gen_sse4_1_pinsrd;
33793 break;
33794
33795 case DImode:
33796 gcc_assert (TARGET_64BIT);
33797 if (!TARGET_SSE4_1)
33798 return false;
33799 dstmode = V2DImode;
33800 pinsr = gen_sse4_1_pinsrq;
33801 break;
33802
33803 default:
33804 return false;
33805 }
33806
33807 dst = gen_lowpart (dstmode, dst);
33808 src = gen_lowpart (srcmode, src);
33809
33810 pos /= size;
33811
33812 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33813 return true;
33814 }
33815
33816 default:
33817 return false;
33818 }
33819 }
33820 \f
33821 /* This function returns the calling abi specific va_list type node.
33822 It returns the FNDECL specific va_list type. */
33823
33824 static tree
33825 ix86_fn_abi_va_list (tree fndecl)
33826 {
33827 if (!TARGET_64BIT)
33828 return va_list_type_node;
33829 gcc_assert (fndecl != NULL_TREE);
33830
33831 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33832 return ms_va_list_type_node;
33833 else
33834 return sysv_va_list_type_node;
33835 }
33836
33837 /* Returns the canonical va_list type specified by TYPE. If there
33838 is no valid TYPE provided, it return NULL_TREE. */
33839
33840 static tree
33841 ix86_canonical_va_list_type (tree type)
33842 {
33843 tree wtype, htype;
33844
33845 /* Resolve references and pointers to va_list type. */
33846 if (TREE_CODE (type) == MEM_REF)
33847 type = TREE_TYPE (type);
33848 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33849 type = TREE_TYPE (type);
33850 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33851 type = TREE_TYPE (type);
33852
33853 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33854 {
33855 wtype = va_list_type_node;
33856 gcc_assert (wtype != NULL_TREE);
33857 htype = type;
33858 if (TREE_CODE (wtype) == ARRAY_TYPE)
33859 {
33860 /* If va_list is an array type, the argument may have decayed
33861 to a pointer type, e.g. by being passed to another function.
33862 In that case, unwrap both types so that we can compare the
33863 underlying records. */
33864 if (TREE_CODE (htype) == ARRAY_TYPE
33865 || POINTER_TYPE_P (htype))
33866 {
33867 wtype = TREE_TYPE (wtype);
33868 htype = TREE_TYPE (htype);
33869 }
33870 }
33871 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33872 return va_list_type_node;
33873 wtype = sysv_va_list_type_node;
33874 gcc_assert (wtype != NULL_TREE);
33875 htype = type;
33876 if (TREE_CODE (wtype) == ARRAY_TYPE)
33877 {
33878 /* If va_list is an array type, the argument may have decayed
33879 to a pointer type, e.g. by being passed to another function.
33880 In that case, unwrap both types so that we can compare the
33881 underlying records. */
33882 if (TREE_CODE (htype) == ARRAY_TYPE
33883 || POINTER_TYPE_P (htype))
33884 {
33885 wtype = TREE_TYPE (wtype);
33886 htype = TREE_TYPE (htype);
33887 }
33888 }
33889 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33890 return sysv_va_list_type_node;
33891 wtype = ms_va_list_type_node;
33892 gcc_assert (wtype != NULL_TREE);
33893 htype = type;
33894 if (TREE_CODE (wtype) == ARRAY_TYPE)
33895 {
33896 /* If va_list is an array type, the argument may have decayed
33897 to a pointer type, e.g. by being passed to another function.
33898 In that case, unwrap both types so that we can compare the
33899 underlying records. */
33900 if (TREE_CODE (htype) == ARRAY_TYPE
33901 || POINTER_TYPE_P (htype))
33902 {
33903 wtype = TREE_TYPE (wtype);
33904 htype = TREE_TYPE (htype);
33905 }
33906 }
33907 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33908 return ms_va_list_type_node;
33909 return NULL_TREE;
33910 }
33911 return std_canonical_va_list_type (type);
33912 }
33913
33914 /* Iterate through the target-specific builtin types for va_list.
33915 IDX denotes the iterator, *PTREE is set to the result type of
33916 the va_list builtin, and *PNAME to its internal type.
33917 Returns zero if there is no element for this index, otherwise
33918 IDX should be increased upon the next call.
33919 Note, do not iterate a base builtin's name like __builtin_va_list.
33920 Used from c_common_nodes_and_builtins. */
33921
33922 static int
33923 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33924 {
33925 if (TARGET_64BIT)
33926 {
33927 switch (idx)
33928 {
33929 default:
33930 break;
33931
33932 case 0:
33933 *ptree = ms_va_list_type_node;
33934 *pname = "__builtin_ms_va_list";
33935 return 1;
33936
33937 case 1:
33938 *ptree = sysv_va_list_type_node;
33939 *pname = "__builtin_sysv_va_list";
33940 return 1;
33941 }
33942 }
33943
33944 return 0;
33945 }
33946
33947 #undef TARGET_SCHED_DISPATCH
33948 #define TARGET_SCHED_DISPATCH has_dispatch
33949 #undef TARGET_SCHED_DISPATCH_DO
33950 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33951
33952 /* The size of the dispatch window is the total number of bytes of
33953 object code allowed in a window. */
33954 #define DISPATCH_WINDOW_SIZE 16
33955
33956 /* Number of dispatch windows considered for scheduling. */
33957 #define MAX_DISPATCH_WINDOWS 3
33958
33959 /* Maximum number of instructions in a window. */
33960 #define MAX_INSN 4
33961
33962 /* Maximum number of immediate operands in a window. */
33963 #define MAX_IMM 4
33964
33965 /* Maximum number of immediate bits allowed in a window. */
33966 #define MAX_IMM_SIZE 128
33967
33968 /* Maximum number of 32 bit immediates allowed in a window. */
33969 #define MAX_IMM_32 4
33970
33971 /* Maximum number of 64 bit immediates allowed in a window. */
33972 #define MAX_IMM_64 2
33973
33974 /* Maximum total of loads or prefetches allowed in a window. */
33975 #define MAX_LOAD 2
33976
33977 /* Maximum total of stores allowed in a window. */
33978 #define MAX_STORE 1
33979
33980 #undef BIG
33981 #define BIG 100
33982
33983
33984 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33985 enum dispatch_group {
33986 disp_no_group = 0,
33987 disp_load,
33988 disp_store,
33989 disp_load_store,
33990 disp_prefetch,
33991 disp_imm,
33992 disp_imm_32,
33993 disp_imm_64,
33994 disp_branch,
33995 disp_cmp,
33996 disp_jcc,
33997 disp_last
33998 };
33999
34000 /* Number of allowable groups in a dispatch window. It is an array
34001 indexed by dispatch_group enum. 100 is used as a big number,
34002 because the number of these kind of operations does not have any
34003 effect in dispatch window, but we need them for other reasons in
34004 the table. */
34005 static unsigned int num_allowable_groups[disp_last] = {
34006 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34007 };
34008
34009 char group_name[disp_last + 1][16] = {
34010 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34011 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34012 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34013 };
34014
34015 /* Instruction path. */
34016 enum insn_path {
34017 no_path = 0,
34018 path_single, /* Single micro op. */
34019 path_double, /* Double micro op. */
34020 path_multi, /* Instructions with more than 2 micro op.. */
34021 last_path
34022 };
34023
34024 /* sched_insn_info defines a window to the instructions scheduled in
34025 the basic block. It contains a pointer to the insn_info table and
34026 the instruction scheduled.
34027
34028 Windows are allocated for each basic block and are linked
34029 together. */
34030 typedef struct sched_insn_info_s {
34031 rtx insn;
34032 enum dispatch_group group;
34033 enum insn_path path;
34034 int byte_len;
34035 int imm_bytes;
34036 } sched_insn_info;
34037
34038 /* Linked list of dispatch windows. This is a two way list of
34039 dispatch windows of a basic block. It contains information about
34040 the number of uops in the window and the total number of
34041 instructions and of bytes in the object code for this dispatch
34042 window. */
34043 typedef struct dispatch_windows_s {
34044 int num_insn; /* Number of insn in the window. */
34045 int num_uops; /* Number of uops in the window. */
34046 int window_size; /* Number of bytes in the window. */
34047 int window_num; /* Window number between 0 or 1. */
34048 int num_imm; /* Number of immediates in an insn. */
34049 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34050 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34051 int imm_size; /* Total immediates in the window. */
34052 int num_loads; /* Total memory loads in the window. */
34053 int num_stores; /* Total memory stores in the window. */
34054 int violation; /* Violation exists in window. */
34055 sched_insn_info *window; /* Pointer to the window. */
34056 struct dispatch_windows_s *next;
34057 struct dispatch_windows_s *prev;
34058 } dispatch_windows;
34059
34060 /* Immediate valuse used in an insn. */
34061 typedef struct imm_info_s
34062 {
34063 int imm;
34064 int imm32;
34065 int imm64;
34066 } imm_info;
34067
34068 static dispatch_windows *dispatch_window_list;
34069 static dispatch_windows *dispatch_window_list1;
34070
34071 /* Get dispatch group of insn. */
34072
34073 static enum dispatch_group
34074 get_mem_group (rtx insn)
34075 {
34076 enum attr_memory memory;
34077
34078 if (INSN_CODE (insn) < 0)
34079 return disp_no_group;
34080 memory = get_attr_memory (insn);
34081 if (memory == MEMORY_STORE)
34082 return disp_store;
34083
34084 if (memory == MEMORY_LOAD)
34085 return disp_load;
34086
34087 if (memory == MEMORY_BOTH)
34088 return disp_load_store;
34089
34090 return disp_no_group;
34091 }
34092
34093 /* Return true if insn is a compare instruction. */
34094
34095 static bool
34096 is_cmp (rtx insn)
34097 {
34098 enum attr_type type;
34099
34100 type = get_attr_type (insn);
34101 return (type == TYPE_TEST
34102 || type == TYPE_ICMP
34103 || type == TYPE_FCMP
34104 || GET_CODE (PATTERN (insn)) == COMPARE);
34105 }
34106
34107 /* Return true if a dispatch violation encountered. */
34108
34109 static bool
34110 dispatch_violation (void)
34111 {
34112 if (dispatch_window_list->next)
34113 return dispatch_window_list->next->violation;
34114 return dispatch_window_list->violation;
34115 }
34116
34117 /* Return true if insn is a branch instruction. */
34118
34119 static bool
34120 is_branch (rtx insn)
34121 {
34122 return (CALL_P (insn) || JUMP_P (insn));
34123 }
34124
34125 /* Return true if insn is a prefetch instruction. */
34126
34127 static bool
34128 is_prefetch (rtx insn)
34129 {
34130 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34131 }
34132
34133 /* This function initializes a dispatch window and the list container holding a
34134 pointer to the window. */
34135
34136 static void
34137 init_window (int window_num)
34138 {
34139 int i;
34140 dispatch_windows *new_list;
34141
34142 if (window_num == 0)
34143 new_list = dispatch_window_list;
34144 else
34145 new_list = dispatch_window_list1;
34146
34147 new_list->num_insn = 0;
34148 new_list->num_uops = 0;
34149 new_list->window_size = 0;
34150 new_list->next = NULL;
34151 new_list->prev = NULL;
34152 new_list->window_num = window_num;
34153 new_list->num_imm = 0;
34154 new_list->num_imm_32 = 0;
34155 new_list->num_imm_64 = 0;
34156 new_list->imm_size = 0;
34157 new_list->num_loads = 0;
34158 new_list->num_stores = 0;
34159 new_list->violation = false;
34160
34161 for (i = 0; i < MAX_INSN; i++)
34162 {
34163 new_list->window[i].insn = NULL;
34164 new_list->window[i].group = disp_no_group;
34165 new_list->window[i].path = no_path;
34166 new_list->window[i].byte_len = 0;
34167 new_list->window[i].imm_bytes = 0;
34168 }
34169 return;
34170 }
34171
34172 /* This function allocates and initializes a dispatch window and the
34173 list container holding a pointer to the window. */
34174
34175 static dispatch_windows *
34176 allocate_window (void)
34177 {
34178 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34179 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34180
34181 return new_list;
34182 }
34183
34184 /* This routine initializes the dispatch scheduling information. It
34185 initiates building dispatch scheduler tables and constructs the
34186 first dispatch window. */
34187
34188 static void
34189 init_dispatch_sched (void)
34190 {
34191 /* Allocate a dispatch list and a window. */
34192 dispatch_window_list = allocate_window ();
34193 dispatch_window_list1 = allocate_window ();
34194 init_window (0);
34195 init_window (1);
34196 }
34197
34198 /* This function returns true if a branch is detected. End of a basic block
34199 does not have to be a branch, but here we assume only branches end a
34200 window. */
34201
34202 static bool
34203 is_end_basic_block (enum dispatch_group group)
34204 {
34205 return group == disp_branch;
34206 }
34207
34208 /* This function is called when the end of a window processing is reached. */
34209
34210 static void
34211 process_end_window (void)
34212 {
34213 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34214 if (dispatch_window_list->next)
34215 {
34216 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34217 gcc_assert (dispatch_window_list->window_size
34218 + dispatch_window_list1->window_size <= 48);
34219 init_window (1);
34220 }
34221 init_window (0);
34222 }
34223
34224 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34225 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34226 for 48 bytes of instructions. Note that these windows are not dispatch
34227 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34228
34229 static dispatch_windows *
34230 allocate_next_window (int window_num)
34231 {
34232 if (window_num == 0)
34233 {
34234 if (dispatch_window_list->next)
34235 init_window (1);
34236 init_window (0);
34237 return dispatch_window_list;
34238 }
34239
34240 dispatch_window_list->next = dispatch_window_list1;
34241 dispatch_window_list1->prev = dispatch_window_list;
34242
34243 return dispatch_window_list1;
34244 }
34245
34246 /* Increment the number of immediate operands of an instruction. */
34247
34248 static int
34249 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34250 {
34251 if (*in_rtx == 0)
34252 return 0;
34253
34254 switch ( GET_CODE (*in_rtx))
34255 {
34256 case CONST:
34257 case SYMBOL_REF:
34258 case CONST_INT:
34259 (imm_values->imm)++;
34260 if (x86_64_immediate_operand (*in_rtx, SImode))
34261 (imm_values->imm32)++;
34262 else
34263 (imm_values->imm64)++;
34264 break;
34265
34266 case CONST_DOUBLE:
34267 (imm_values->imm)++;
34268 (imm_values->imm64)++;
34269 break;
34270
34271 case CODE_LABEL:
34272 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34273 {
34274 (imm_values->imm)++;
34275 (imm_values->imm32)++;
34276 }
34277 break;
34278
34279 default:
34280 break;
34281 }
34282
34283 return 0;
34284 }
34285
34286 /* Compute number of immediate operands of an instruction. */
34287
34288 static void
34289 find_constant (rtx in_rtx, imm_info *imm_values)
34290 {
34291 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34292 (rtx_function) find_constant_1, (void *) imm_values);
34293 }
34294
34295 /* Return total size of immediate operands of an instruction along with number
34296 of corresponding immediate-operands. It initializes its parameters to zero
34297 befor calling FIND_CONSTANT.
34298 INSN is the input instruction. IMM is the total of immediates.
34299 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34300 bit immediates. */
34301
34302 static int
34303 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34304 {
34305 imm_info imm_values = {0, 0, 0};
34306
34307 find_constant (insn, &imm_values);
34308 *imm = imm_values.imm;
34309 *imm32 = imm_values.imm32;
34310 *imm64 = imm_values.imm64;
34311 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34312 }
34313
34314 /* This function indicates if an operand of an instruction is an
34315 immediate. */
34316
34317 static bool
34318 has_immediate (rtx insn)
34319 {
34320 int num_imm_operand;
34321 int num_imm32_operand;
34322 int num_imm64_operand;
34323
34324 if (insn)
34325 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34326 &num_imm64_operand);
34327 return false;
34328 }
34329
34330 /* Return single or double path for instructions. */
34331
34332 static enum insn_path
34333 get_insn_path (rtx insn)
34334 {
34335 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34336
34337 if ((int)path == 0)
34338 return path_single;
34339
34340 if ((int)path == 1)
34341 return path_double;
34342
34343 return path_multi;
34344 }
34345
34346 /* Return insn dispatch group. */
34347
34348 static enum dispatch_group
34349 get_insn_group (rtx insn)
34350 {
34351 enum dispatch_group group = get_mem_group (insn);
34352 if (group)
34353 return group;
34354
34355 if (is_branch (insn))
34356 return disp_branch;
34357
34358 if (is_cmp (insn))
34359 return disp_cmp;
34360
34361 if (has_immediate (insn))
34362 return disp_imm;
34363
34364 if (is_prefetch (insn))
34365 return disp_prefetch;
34366
34367 return disp_no_group;
34368 }
34369
34370 /* Count number of GROUP restricted instructions in a dispatch
34371 window WINDOW_LIST. */
34372
34373 static int
34374 count_num_restricted (rtx insn, dispatch_windows *window_list)
34375 {
34376 enum dispatch_group group = get_insn_group (insn);
34377 int imm_size;
34378 int num_imm_operand;
34379 int num_imm32_operand;
34380 int num_imm64_operand;
34381
34382 if (group == disp_no_group)
34383 return 0;
34384
34385 if (group == disp_imm)
34386 {
34387 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34388 &num_imm64_operand);
34389 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34390 || num_imm_operand + window_list->num_imm > MAX_IMM
34391 || (num_imm32_operand > 0
34392 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34393 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34394 || (num_imm64_operand > 0
34395 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34396 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34397 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34398 && num_imm64_operand > 0
34399 && ((window_list->num_imm_64 > 0
34400 && window_list->num_insn >= 2)
34401 || window_list->num_insn >= 3)))
34402 return BIG;
34403
34404 return 1;
34405 }
34406
34407 if ((group == disp_load_store
34408 && (window_list->num_loads >= MAX_LOAD
34409 || window_list->num_stores >= MAX_STORE))
34410 || ((group == disp_load
34411 || group == disp_prefetch)
34412 && window_list->num_loads >= MAX_LOAD)
34413 || (group == disp_store
34414 && window_list->num_stores >= MAX_STORE))
34415 return BIG;
34416
34417 return 1;
34418 }
34419
34420 /* This function returns true if insn satisfies dispatch rules on the
34421 last window scheduled. */
34422
34423 static bool
34424 fits_dispatch_window (rtx insn)
34425 {
34426 dispatch_windows *window_list = dispatch_window_list;
34427 dispatch_windows *window_list_next = dispatch_window_list->next;
34428 unsigned int num_restrict;
34429 enum dispatch_group group = get_insn_group (insn);
34430 enum insn_path path = get_insn_path (insn);
34431 int sum;
34432
34433 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34434 instructions should be given the lowest priority in the
34435 scheduling process in Haifa scheduler to make sure they will be
34436 scheduled in the same dispatch window as the refrence to them. */
34437 if (group == disp_jcc || group == disp_cmp)
34438 return false;
34439
34440 /* Check nonrestricted. */
34441 if (group == disp_no_group || group == disp_branch)
34442 return true;
34443
34444 /* Get last dispatch window. */
34445 if (window_list_next)
34446 window_list = window_list_next;
34447
34448 if (window_list->window_num == 1)
34449 {
34450 sum = window_list->prev->window_size + window_list->window_size;
34451
34452 if (sum == 32
34453 || (min_insn_size (insn) + sum) >= 48)
34454 /* Window 1 is full. Go for next window. */
34455 return true;
34456 }
34457
34458 num_restrict = count_num_restricted (insn, window_list);
34459
34460 if (num_restrict > num_allowable_groups[group])
34461 return false;
34462
34463 /* See if it fits in the first window. */
34464 if (window_list->window_num == 0)
34465 {
34466 /* The first widow should have only single and double path
34467 uops. */
34468 if (path == path_double
34469 && (window_list->num_uops + 2) > MAX_INSN)
34470 return false;
34471 else if (path != path_single)
34472 return false;
34473 }
34474 return true;
34475 }
34476
34477 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34478 dispatch window WINDOW_LIST. */
34479
34480 static void
34481 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34482 {
34483 int byte_len = min_insn_size (insn);
34484 int num_insn = window_list->num_insn;
34485 int imm_size;
34486 sched_insn_info *window = window_list->window;
34487 enum dispatch_group group = get_insn_group (insn);
34488 enum insn_path path = get_insn_path (insn);
34489 int num_imm_operand;
34490 int num_imm32_operand;
34491 int num_imm64_operand;
34492
34493 if (!window_list->violation && group != disp_cmp
34494 && !fits_dispatch_window (insn))
34495 window_list->violation = true;
34496
34497 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34498 &num_imm64_operand);
34499
34500 /* Initialize window with new instruction. */
34501 window[num_insn].insn = insn;
34502 window[num_insn].byte_len = byte_len;
34503 window[num_insn].group = group;
34504 window[num_insn].path = path;
34505 window[num_insn].imm_bytes = imm_size;
34506
34507 window_list->window_size += byte_len;
34508 window_list->num_insn = num_insn + 1;
34509 window_list->num_uops = window_list->num_uops + num_uops;
34510 window_list->imm_size += imm_size;
34511 window_list->num_imm += num_imm_operand;
34512 window_list->num_imm_32 += num_imm32_operand;
34513 window_list->num_imm_64 += num_imm64_operand;
34514
34515 if (group == disp_store)
34516 window_list->num_stores += 1;
34517 else if (group == disp_load
34518 || group == disp_prefetch)
34519 window_list->num_loads += 1;
34520 else if (group == disp_load_store)
34521 {
34522 window_list->num_stores += 1;
34523 window_list->num_loads += 1;
34524 }
34525 }
34526
34527 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34528 If the total bytes of instructions or the number of instructions in
34529 the window exceed allowable, it allocates a new window. */
34530
34531 static void
34532 add_to_dispatch_window (rtx insn)
34533 {
34534 int byte_len;
34535 dispatch_windows *window_list;
34536 dispatch_windows *next_list;
34537 dispatch_windows *window0_list;
34538 enum insn_path path;
34539 enum dispatch_group insn_group;
34540 bool insn_fits;
34541 int num_insn;
34542 int num_uops;
34543 int window_num;
34544 int insn_num_uops;
34545 int sum;
34546
34547 if (INSN_CODE (insn) < 0)
34548 return;
34549
34550 byte_len = min_insn_size (insn);
34551 window_list = dispatch_window_list;
34552 next_list = window_list->next;
34553 path = get_insn_path (insn);
34554 insn_group = get_insn_group (insn);
34555
34556 /* Get the last dispatch window. */
34557 if (next_list)
34558 window_list = dispatch_window_list->next;
34559
34560 if (path == path_single)
34561 insn_num_uops = 1;
34562 else if (path == path_double)
34563 insn_num_uops = 2;
34564 else
34565 insn_num_uops = (int) path;
34566
34567 /* If current window is full, get a new window.
34568 Window number zero is full, if MAX_INSN uops are scheduled in it.
34569 Window number one is full, if window zero's bytes plus window
34570 one's bytes is 32, or if the bytes of the new instruction added
34571 to the total makes it greater than 48, or it has already MAX_INSN
34572 instructions in it. */
34573 num_insn = window_list->num_insn;
34574 num_uops = window_list->num_uops;
34575 window_num = window_list->window_num;
34576 insn_fits = fits_dispatch_window (insn);
34577
34578 if (num_insn >= MAX_INSN
34579 || num_uops + insn_num_uops > MAX_INSN
34580 || !(insn_fits))
34581 {
34582 window_num = ~window_num & 1;
34583 window_list = allocate_next_window (window_num);
34584 }
34585
34586 if (window_num == 0)
34587 {
34588 add_insn_window (insn, window_list, insn_num_uops);
34589 if (window_list->num_insn >= MAX_INSN
34590 && insn_group == disp_branch)
34591 {
34592 process_end_window ();
34593 return;
34594 }
34595 }
34596 else if (window_num == 1)
34597 {
34598 window0_list = window_list->prev;
34599 sum = window0_list->window_size + window_list->window_size;
34600 if (sum == 32
34601 || (byte_len + sum) >= 48)
34602 {
34603 process_end_window ();
34604 window_list = dispatch_window_list;
34605 }
34606
34607 add_insn_window (insn, window_list, insn_num_uops);
34608 }
34609 else
34610 gcc_unreachable ();
34611
34612 if (is_end_basic_block (insn_group))
34613 {
34614 /* End of basic block is reached do end-basic-block process. */
34615 process_end_window ();
34616 return;
34617 }
34618 }
34619
34620 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34621
34622 DEBUG_FUNCTION static void
34623 debug_dispatch_window_file (FILE *file, int window_num)
34624 {
34625 dispatch_windows *list;
34626 int i;
34627
34628 if (window_num == 0)
34629 list = dispatch_window_list;
34630 else
34631 list = dispatch_window_list1;
34632
34633 fprintf (file, "Window #%d:\n", list->window_num);
34634 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34635 list->num_insn, list->num_uops, list->window_size);
34636 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34637 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34638
34639 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34640 list->num_stores);
34641 fprintf (file, " insn info:\n");
34642
34643 for (i = 0; i < MAX_INSN; i++)
34644 {
34645 if (!list->window[i].insn)
34646 break;
34647 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34648 i, group_name[list->window[i].group],
34649 i, (void *)list->window[i].insn,
34650 i, list->window[i].path,
34651 i, list->window[i].byte_len,
34652 i, list->window[i].imm_bytes);
34653 }
34654 }
34655
34656 /* Print to stdout a dispatch window. */
34657
34658 DEBUG_FUNCTION void
34659 debug_dispatch_window (int window_num)
34660 {
34661 debug_dispatch_window_file (stdout, window_num);
34662 }
34663
34664 /* Print INSN dispatch information to FILE. */
34665
34666 DEBUG_FUNCTION static void
34667 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34668 {
34669 int byte_len;
34670 enum insn_path path;
34671 enum dispatch_group group;
34672 int imm_size;
34673 int num_imm_operand;
34674 int num_imm32_operand;
34675 int num_imm64_operand;
34676
34677 if (INSN_CODE (insn) < 0)
34678 return;
34679
34680 byte_len = min_insn_size (insn);
34681 path = get_insn_path (insn);
34682 group = get_insn_group (insn);
34683 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34684 &num_imm64_operand);
34685
34686 fprintf (file, " insn info:\n");
34687 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34688 group_name[group], path, byte_len);
34689 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34690 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34691 }
34692
34693 /* Print to STDERR the status of the ready list with respect to
34694 dispatch windows. */
34695
34696 DEBUG_FUNCTION void
34697 debug_ready_dispatch (void)
34698 {
34699 int i;
34700 int no_ready = number_in_ready ();
34701
34702 fprintf (stdout, "Number of ready: %d\n", no_ready);
34703
34704 for (i = 0; i < no_ready; i++)
34705 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34706 }
34707
34708 /* This routine is the driver of the dispatch scheduler. */
34709
34710 static void
34711 do_dispatch (rtx insn, int mode)
34712 {
34713 if (mode == DISPATCH_INIT)
34714 init_dispatch_sched ();
34715 else if (mode == ADD_TO_DISPATCH_WINDOW)
34716 add_to_dispatch_window (insn);
34717 }
34718
34719 /* Return TRUE if Dispatch Scheduling is supported. */
34720
34721 static bool
34722 has_dispatch (rtx insn, int action)
34723 {
34724 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
34725 && flag_dispatch_scheduler)
34726 switch (action)
34727 {
34728 default:
34729 return false;
34730
34731 case IS_DISPATCH_ON:
34732 return true;
34733 break;
34734
34735 case IS_CMP:
34736 return is_cmp (insn);
34737
34738 case DISPATCH_VIOLATION:
34739 return dispatch_violation ();
34740
34741 case FITS_DISPATCH_WINDOW:
34742 return fits_dispatch_window (insn);
34743 }
34744
34745 return false;
34746 }
34747
34748 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34749 place emms and femms instructions. */
34750
34751 static enum machine_mode
34752 ix86_preferred_simd_mode (enum machine_mode mode)
34753 {
34754 if (!TARGET_SSE)
34755 return word_mode;
34756
34757 switch (mode)
34758 {
34759 case QImode:
34760 return V16QImode;
34761 case HImode:
34762 return V8HImode;
34763 case SImode:
34764 return V4SImode;
34765 case DImode:
34766 return V2DImode;
34767
34768 case SFmode:
34769 if (TARGET_AVX && !TARGET_PREFER_AVX128)
34770 return V8SFmode;
34771 else
34772 return V4SFmode;
34773
34774 case DFmode:
34775 if (!TARGET_VECTORIZE_DOUBLE)
34776 return word_mode;
34777 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
34778 return V4DFmode;
34779 else if (TARGET_SSE2)
34780 return V2DFmode;
34781 /* FALLTHRU */
34782
34783 default:
34784 return word_mode;
34785 }
34786 }
34787
34788 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34789 vectors. */
34790
34791 static unsigned int
34792 ix86_autovectorize_vector_sizes (void)
34793 {
34794 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
34795 }
34796
34797 /* Initialize the GCC target structure. */
34798 #undef TARGET_RETURN_IN_MEMORY
34799 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34800
34801 #undef TARGET_LEGITIMIZE_ADDRESS
34802 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34803
34804 #undef TARGET_ATTRIBUTE_TABLE
34805 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34806 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34807 # undef TARGET_MERGE_DECL_ATTRIBUTES
34808 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34809 #endif
34810
34811 #undef TARGET_COMP_TYPE_ATTRIBUTES
34812 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34813
34814 #undef TARGET_INIT_BUILTINS
34815 #define TARGET_INIT_BUILTINS ix86_init_builtins
34816 #undef TARGET_BUILTIN_DECL
34817 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34818 #undef TARGET_EXPAND_BUILTIN
34819 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34820
34821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34823 ix86_builtin_vectorized_function
34824
34825 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34826 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34827
34828 #undef TARGET_BUILTIN_RECIPROCAL
34829 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34830
34831 #undef TARGET_ASM_FUNCTION_EPILOGUE
34832 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34833
34834 #undef TARGET_ENCODE_SECTION_INFO
34835 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34836 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34837 #else
34838 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34839 #endif
34840
34841 #undef TARGET_ASM_OPEN_PAREN
34842 #define TARGET_ASM_OPEN_PAREN ""
34843 #undef TARGET_ASM_CLOSE_PAREN
34844 #define TARGET_ASM_CLOSE_PAREN ""
34845
34846 #undef TARGET_ASM_BYTE_OP
34847 #define TARGET_ASM_BYTE_OP ASM_BYTE
34848
34849 #undef TARGET_ASM_ALIGNED_HI_OP
34850 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34851 #undef TARGET_ASM_ALIGNED_SI_OP
34852 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34853 #ifdef ASM_QUAD
34854 #undef TARGET_ASM_ALIGNED_DI_OP
34855 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34856 #endif
34857
34858 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34859 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34860
34861 #undef TARGET_ASM_UNALIGNED_HI_OP
34862 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34863 #undef TARGET_ASM_UNALIGNED_SI_OP
34864 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34865 #undef TARGET_ASM_UNALIGNED_DI_OP
34866 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34867
34868 #undef TARGET_PRINT_OPERAND
34869 #define TARGET_PRINT_OPERAND ix86_print_operand
34870 #undef TARGET_PRINT_OPERAND_ADDRESS
34871 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34872 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34873 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34874 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34875 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34876
34877 #undef TARGET_SCHED_INIT_GLOBAL
34878 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34879 #undef TARGET_SCHED_ADJUST_COST
34880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34881 #undef TARGET_SCHED_ISSUE_RATE
34882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34885 ia32_multipass_dfa_lookahead
34886
34887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34889
34890 #ifdef HAVE_AS_TLS
34891 #undef TARGET_HAVE_TLS
34892 #define TARGET_HAVE_TLS true
34893 #endif
34894 #undef TARGET_CANNOT_FORCE_CONST_MEM
34895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34898
34899 #undef TARGET_DELEGITIMIZE_ADDRESS
34900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34901
34902 #undef TARGET_MS_BITFIELD_LAYOUT_P
34903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34904
34905 #if TARGET_MACHO
34906 #undef TARGET_BINDS_LOCAL_P
34907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34908 #endif
34909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34910 #undef TARGET_BINDS_LOCAL_P
34911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34912 #endif
34913
34914 #undef TARGET_ASM_OUTPUT_MI_THUNK
34915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34918
34919 #undef TARGET_ASM_FILE_START
34920 #define TARGET_ASM_FILE_START x86_file_start
34921
34922 #undef TARGET_OPTION_OVERRIDE
34923 #define TARGET_OPTION_OVERRIDE ix86_option_override
34924
34925 #undef TARGET_REGISTER_MOVE_COST
34926 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34927 #undef TARGET_MEMORY_MOVE_COST
34928 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34929 #undef TARGET_RTX_COSTS
34930 #define TARGET_RTX_COSTS ix86_rtx_costs
34931 #undef TARGET_ADDRESS_COST
34932 #define TARGET_ADDRESS_COST ix86_address_cost
34933
34934 #undef TARGET_FIXED_CONDITION_CODE_REGS
34935 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34936 #undef TARGET_CC_MODES_COMPATIBLE
34937 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34938
34939 #undef TARGET_MACHINE_DEPENDENT_REORG
34940 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34941
34942 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34943 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34944
34945 #undef TARGET_BUILD_BUILTIN_VA_LIST
34946 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34947
34948 #undef TARGET_ENUM_VA_LIST_P
34949 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34950
34951 #undef TARGET_FN_ABI_VA_LIST
34952 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34953
34954 #undef TARGET_CANONICAL_VA_LIST_TYPE
34955 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34956
34957 #undef TARGET_EXPAND_BUILTIN_VA_START
34958 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34959
34960 #undef TARGET_MD_ASM_CLOBBERS
34961 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34962
34963 #undef TARGET_PROMOTE_PROTOTYPES
34964 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34965 #undef TARGET_STRUCT_VALUE_RTX
34966 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34967 #undef TARGET_SETUP_INCOMING_VARARGS
34968 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34969 #undef TARGET_MUST_PASS_IN_STACK
34970 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34971 #undef TARGET_FUNCTION_ARG_ADVANCE
34972 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34973 #undef TARGET_FUNCTION_ARG
34974 #define TARGET_FUNCTION_ARG ix86_function_arg
34975 #undef TARGET_FUNCTION_ARG_BOUNDARY
34976 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34977 #undef TARGET_PASS_BY_REFERENCE
34978 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34979 #undef TARGET_INTERNAL_ARG_POINTER
34980 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34981 #undef TARGET_UPDATE_STACK_BOUNDARY
34982 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34983 #undef TARGET_GET_DRAP_RTX
34984 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34985 #undef TARGET_STRICT_ARGUMENT_NAMING
34986 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34987 #undef TARGET_STATIC_CHAIN
34988 #define TARGET_STATIC_CHAIN ix86_static_chain
34989 #undef TARGET_TRAMPOLINE_INIT
34990 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34991 #undef TARGET_RETURN_POPS_ARGS
34992 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34993
34994 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34995 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34996
34997 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34998 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34999
35000 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35001 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35002
35003 #undef TARGET_C_MODE_FOR_SUFFIX
35004 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35005
35006 #ifdef HAVE_AS_TLS
35007 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35008 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35009 #endif
35010
35011 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35012 #undef TARGET_INSERT_ATTRIBUTES
35013 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35014 #endif
35015
35016 #undef TARGET_MANGLE_TYPE
35017 #define TARGET_MANGLE_TYPE ix86_mangle_type
35018
35019 #ifndef TARGET_MACHO
35020 #undef TARGET_STACK_PROTECT_FAIL
35021 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35022 #endif
35023
35024 #undef TARGET_FUNCTION_VALUE
35025 #define TARGET_FUNCTION_VALUE ix86_function_value
35026
35027 #undef TARGET_FUNCTION_VALUE_REGNO_P
35028 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35029
35030 #undef TARGET_PROMOTE_FUNCTION_MODE
35031 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35032
35033 #undef TARGET_SECONDARY_RELOAD
35034 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35035
35036 #undef TARGET_CLASS_MAX_NREGS
35037 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35038
35039 #undef TARGET_PREFERRED_RELOAD_CLASS
35040 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35041 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35042 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35043 #undef TARGET_CLASS_LIKELY_SPILLED_P
35044 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35045
35046 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35047 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35048 ix86_builtin_vectorization_cost
35049 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35050 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35051 ix86_vectorize_builtin_vec_perm
35052 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35053 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35054 ix86_vectorize_builtin_vec_perm_ok
35055 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35056 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35057 ix86_preferred_simd_mode
35058 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35059 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35060 ix86_autovectorize_vector_sizes
35061
35062 #undef TARGET_SET_CURRENT_FUNCTION
35063 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35064
35065 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35066 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35067
35068 #undef TARGET_OPTION_SAVE
35069 #define TARGET_OPTION_SAVE ix86_function_specific_save
35070
35071 #undef TARGET_OPTION_RESTORE
35072 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35073
35074 #undef TARGET_OPTION_PRINT
35075 #define TARGET_OPTION_PRINT ix86_function_specific_print
35076
35077 #undef TARGET_CAN_INLINE_P
35078 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35079
35080 #undef TARGET_EXPAND_TO_RTL_HOOK
35081 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35082
35083 #undef TARGET_LEGITIMATE_ADDRESS_P
35084 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35085
35086 #undef TARGET_LEGITIMATE_CONSTANT_P
35087 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35088
35089 #undef TARGET_FRAME_POINTER_REQUIRED
35090 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35091
35092 #undef TARGET_CAN_ELIMINATE
35093 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35094
35095 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35096 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35097
35098 #undef TARGET_ASM_CODE_END
35099 #define TARGET_ASM_CODE_END ix86_code_end
35100
35101 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35102 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35103
35104 #if TARGET_MACHO
35105 #undef TARGET_INIT_LIBFUNCS
35106 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35107 #endif
35108
35109 struct gcc_target targetm = TARGET_INITIALIZER;
35110 \f
35111 #include "gt-i386.h"