re PR target/50603 ([x32] Unnecessary lea)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
63
64 enum upper_128bits_state
65 {
66 unknown = 0,
67 unused,
68 used
69 };
70
71 typedef struct block_info_def
72 {
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
85
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87
88 enum call_avx256_state
89 {
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
101 };
102
103 /* Check if a 256bit AVX register is referenced in stores. */
104
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 {
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 {
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
117 }
118 }
119
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
123
124 STATE is state of the upper 128bits of AVX registers at entry. */
125
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
129 {
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
135
136 if (BLOCK_INFO (bb)->unchanged)
137 {
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
141
142 BLOCK_INFO (bb)->state = state;
143 return;
144 }
145
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 {
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
152 }
153
154 BLOCK_INFO (bb)->prev = state;
155
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
159
160 unchanged = true;
161
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
166 {
167 insn = NEXT_INSN (insn);
168
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
171
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
174 {
175 if (!vzeroupper_insn)
176 continue;
177
178 if (PREV_INSN (insn) != vzeroupper_insn)
179 {
180 if (dump_file)
181 {
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
186 }
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
189 }
190 vzeroupper_insn = NULL_RTX;
191 continue;
192 }
193
194 pat = PATTERN (insn);
195
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
199 {
200 if (dump_file)
201 {
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
205 }
206 }
207 else
208 {
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
213 {
214 state = unused;
215 unchanged = false;
216
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
219 {
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
222 }
223 }
224 else if (state != used)
225 {
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
229 }
230 continue;
231 }
232
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
235
236 if (state == unused)
237 {
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
242 {
243 state = used;
244 unchanged = false;
245 }
246
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
250 {
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
253 }
254 delete_insn (insn);
255 }
256 else
257 {
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
262
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
265 {
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
269 {
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
272 }
273 delete_insn (insn);
274 }
275 else
276 {
277 vzeroupper_insn = insn;
278 unchanged = false;
279 }
280 }
281 }
282
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
286
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
291 }
292
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
297
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
300 {
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
305
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
309
310 if (BLOCK_INFO (block)->processed)
311 return false;
312
313 state = unused;
314
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
318 {
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
322 {
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
331 }
332 }
333
334 if (seen_unknown)
335 state = unknown;
336
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
341
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
344
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
348 {
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
352 }
353 else
354 return false;
355 }
356
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
360
361 static void
362 move_or_delete_vzeroupper (void)
363 {
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
372
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
375
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
379
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 {
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
386 }
387
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
396
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
403
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
410 {
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
413 }
414
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
417
418 while (!fibheap_empty (pending))
419 {
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
426
427 sbitmap_zero (visited);
428
429 cfun->machine->rescan_vzeroupper_p = 0;
430
431 while (!fibheap_empty (worklist))
432 {
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
437 {
438 edge_iterator ei;
439
440 SET_BIT (visited, bb->index);
441
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 {
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
448
449 if (TEST_BIT (visited, e->dest->index))
450 {
451 if (!TEST_BIT (in_pending, e->dest->index))
452 {
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
458 }
459 }
460 else if (!TEST_BIT (in_worklist, e->dest->index))
461 {
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
466 }
467 }
468 }
469 }
470
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
473 }
474
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
481
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
484
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
487
488 free_aux_for_blocks ();
489 }
490
491 static rtx legitimize_dllimport_symbol (rtx, bool);
492
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
496
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
504
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
508
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
579 };
580
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
722 };
723
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
792 };
793
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
869 };
870
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
902
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
940 };
941
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1013 };
1014
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1086 };
1087
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1165 };
1166
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1252 };
1253
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1339 };
1340
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1397 time). */
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1406
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1426 };
1427
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1447 9, /* MOVE_RATIO */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1469 /* On K8:
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1472 On AMDFAM10:
1473 MOVD reg64, xmmreg Double FADD 3
1474 1/1 1/1
1475 MOVD reg32, xmmreg Double FADD 3
1476 1/1 1/1 */
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1488
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1508 };
1509
1510 static const
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1530 6, /* MOVE_RATIO */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1566 {-1, libcall}}},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1579 };
1580
1581 static const
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1638 {-1, libcall}}},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1652 };
1653
1654 static const
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1725 };
1726
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1728 static const
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1802 };
1803
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1805 Athlon and K8. */
1806 static const
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1874 };
1875
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1877
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1895
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1908
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1911
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1915
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1918
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1927
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1930
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1932 m_486 | m_PENT,
1933
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1936
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1941 0,
1942
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1944 ~m_386,
1945
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1948
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1952
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1961 m_PPRO,
1962
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1965
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1968
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1971
1972 /* X86_TUNE_USE_MOV0 */
1973 m_K6,
1974
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1977
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1979 m_PENT4,
1980
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1982 m_PPRO,
1983
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1985 ~m_PENT,
1986
1987 /* X86_TUNE_READ_MODIFY */
1988 ~(m_PENT | m_PPRO),
1989
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1992
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1995
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1998
1999 /* X86_TUNE_QIMODE_MATH */
2000 ~0,
2001
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2006 ~m_PPRO,
2007
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2009 0,
2010
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2012 m_PPRO,
2013
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2017
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2020 m_PENT,
2021
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2025
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2029
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2033
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2036
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2047
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2050
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2052 m_COREI7 | m_BDVER,
2053
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2055 m_BDVER ,
2056
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2061 m_ATHLON_K8,
2062
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2064 m_AMD_MULTIPLE,
2065
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2068
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2071
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2074
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2077
2078 /* X86_TUNE_SHIFT1 */
2079 ~m_486,
2080
2081 /* X86_TUNE_USE_FFREEP */
2082 m_AMD_MULTIPLE,
2083
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2086
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2089
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2093
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2096
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2099
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2102
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2105
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2107 m_ATOM,
2108
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2111
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2113 ~m_K8,
2114
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2117
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2120 ~(m_386 | m_486),
2121
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2125
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2127 machines. */
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2129
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2131 than a MOV. */
2132 m_PENT,
2133
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2136 m_PENT,
2137
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2141 m_K6,
2142
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2144 from FP to FP. */
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2146
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2149 m_AMDFAM10,
2150
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2154 m_BDVER,
2155
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2158 m_ATOM,
2159
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2161 instructions. */
2162 ~m_ATOM,
2163
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2166 chips. */
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2168
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2171 m_BDVER,
2172
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2175 m_ATOM,
2176
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2179 m_ATOM
2180 };
2181
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2184
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2190
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2192 ~m_386,
2193
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2195 ~(m_386 | m_486),
2196
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2198 ~m_386,
2199
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2201 ~m_386,
2202 };
2203
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2206
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2209
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2212
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2215
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2218 epilogue code. */
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2220
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2225
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2228
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2230 {
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2235 /* FP registers */
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2238 /* arg pointer */
2239 NON_Q_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2242 /* SSE registers */
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2244 SSE_REGS, SSE_REGS,
2245 /* MMX registers */
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2247 MMX_REGS, MMX_REGS,
2248 /* REX registers */
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2253 SSE_REGS, SSE_REGS,
2254 };
2255
2256 /* The "default" register map used in 32bit mode. */
2257
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 {
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2267 };
2268
2269 /* The "default" register map used in 64bit mode. */
2270
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2272 {
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2280 };
2281
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2326 numbers.
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2335 */
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2337 {
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2345 };
2346
2347 /* Define parameter passing and return registers. */
2348
2349 static int const x86_64_int_parameter_registers[6] =
2350 {
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2352 };
2353
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2355 {
2356 CX_REG, DX_REG, R8_REG, R9_REG
2357 };
2358
2359 static int const x86_64_int_return_registers[4] =
2360 {
2361 AX_REG, DX_REG, DI_REG, SI_REG
2362 };
2363
2364 /* Define the structure for the machine field in struct function. */
2365
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2368 unsigned short n;
2369 rtx rtl;
2370 struct stack_local_entry *next;
2371 };
2372
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2375
2376 [arguments]
2377 <- ARG_POINTER
2378 saved pc
2379
2380 saved static chain if ix86_static_chain_on_stack
2381
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2384 [saved regs]
2385 <- regs_save_offset
2386 [padding0]
2387
2388 [saved SSE regs]
2389 <- sse_regs_save_offset
2390 [padding1] |
2391 | <- FRAME_POINTER
2392 [va_arg registers] |
2393 |
2394 [frame] |
2395 |
2396 [padding2] | = to_allocate
2397 <- STACK_POINTER
2398 */
2399 struct ix86_frame
2400 {
2401 int nsseregs;
2402 int nregs;
2403 int va_arg_size;
2404 int red_zone_size;
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2407
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2415
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2419 };
2420
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2423
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2426
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2429
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2432
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2436
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2447
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2450
2451 /* Alignment for incoming stack boundary in bits specified at
2452 command line. */
2453 static unsigned int ix86_user_incoming_stack_boundary;
2454
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2457
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2460
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2464
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2468
2469 /* Fence to use after loop using movnt. */
2470 tree x86_mfence;
2471
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2476
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2480 {
2481 X86_64_NO_CLASS,
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2484 X86_64_SSE_CLASS,
2485 X86_64_SSESF_CLASS,
2486 X86_64_SSEDF_CLASS,
2487 X86_64_SSEUP_CLASS,
2488 X86_64_X87_CLASS,
2489 X86_64_X87UP_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2491 X86_64_MEMORY_CLASS
2492 };
2493
2494 #define MAX_CLASSES 4
2495
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2499
2500 \f
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2505 const_tree);
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2510 rtx, rtx, int);
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static rtx ix86_expand_vec_perm_builtin (tree);
2513 static tree ix86_canonical_va_list_type (tree);
2514 static void predict_jump (int);
2515 static unsigned int split_stack_prologue_scratch_regno (void);
2516 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2517
2518 enum ix86_function_specific_strings
2519 {
2520 IX86_FUNCTION_SPECIFIC_ARCH,
2521 IX86_FUNCTION_SPECIFIC_TUNE,
2522 IX86_FUNCTION_SPECIFIC_MAX
2523 };
2524
2525 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2526 const char *, enum fpmath_unit, bool);
2527 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2528 static void ix86_function_specific_save (struct cl_target_option *);
2529 static void ix86_function_specific_restore (struct cl_target_option *);
2530 static void ix86_function_specific_print (FILE *, int,
2531 struct cl_target_option *);
2532 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2533 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2534 struct gcc_options *);
2535 static bool ix86_can_inline_p (tree, tree);
2536 static void ix86_set_current_function (tree);
2537 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538
2539 static enum calling_abi ix86_function_abi (const_tree);
2540
2541 \f
2542 #ifndef SUBTARGET32_DEFAULT_CPU
2543 #define SUBTARGET32_DEFAULT_CPU "i386"
2544 #endif
2545
2546 /* The svr4 ABI for the i386 says that records and unions are returned
2547 in memory. */
2548 #ifndef DEFAULT_PCC_STRUCT_RETURN
2549 #define DEFAULT_PCC_STRUCT_RETURN 1
2550 #endif
2551
2552 /* Whether -mtune= or -march= were specified */
2553 static int ix86_tune_defaulted;
2554 static int ix86_arch_specified;
2555
2556 /* Vectorization library interface and handlers. */
2557 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2558
2559 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2560 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2561
2562 /* Processor target table, indexed by processor number */
2563 struct ptt
2564 {
2565 const struct processor_costs *cost; /* Processor costs */
2566 const int align_loop; /* Default alignments. */
2567 const int align_loop_max_skip;
2568 const int align_jump;
2569 const int align_jump_max_skip;
2570 const int align_func;
2571 };
2572
2573 static const struct ptt processor_target_table[PROCESSOR_max] =
2574 {
2575 {&i386_cost, 4, 3, 4, 3, 4},
2576 {&i486_cost, 16, 15, 16, 15, 16},
2577 {&pentium_cost, 16, 7, 16, 7, 16},
2578 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2579 {&geode_cost, 0, 0, 0, 0, 0},
2580 {&k6_cost, 32, 7, 32, 7, 32},
2581 {&athlon_cost, 16, 7, 16, 7, 16},
2582 {&pentium4_cost, 0, 0, 0, 0, 0},
2583 {&k8_cost, 16, 7, 16, 7, 16},
2584 {&nocona_cost, 0, 0, 0, 0, 0},
2585 /* Core 2 32-bit. */
2586 {&generic32_cost, 16, 10, 16, 10, 16},
2587 /* Core 2 64-bit. */
2588 {&generic64_cost, 16, 10, 16, 10, 16},
2589 /* Core i7 32-bit. */
2590 {&generic32_cost, 16, 10, 16, 10, 16},
2591 /* Core i7 64-bit. */
2592 {&generic64_cost, 16, 10, 16, 10, 16},
2593 {&generic32_cost, 16, 7, 16, 7, 16},
2594 {&generic64_cost, 16, 10, 16, 10, 16},
2595 {&amdfam10_cost, 32, 24, 32, 7, 32},
2596 {&bdver1_cost, 32, 24, 32, 7, 32},
2597 {&bdver2_cost, 32, 24, 32, 7, 32},
2598 {&btver1_cost, 32, 24, 32, 7, 32},
2599 {&atom_cost, 16, 7, 16, 7, 16}
2600 };
2601
2602 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2603 {
2604 "generic",
2605 "i386",
2606 "i486",
2607 "pentium",
2608 "pentium-mmx",
2609 "pentiumpro",
2610 "pentium2",
2611 "pentium3",
2612 "pentium4",
2613 "pentium-m",
2614 "prescott",
2615 "nocona",
2616 "core2",
2617 "corei7",
2618 "atom",
2619 "geode",
2620 "k6",
2621 "k6-2",
2622 "k6-3",
2623 "athlon",
2624 "athlon-4",
2625 "k8",
2626 "amdfam10",
2627 "bdver1",
2628 "bdver2",
2629 "btver1"
2630 };
2631 \f
2632 /* Return true if a red-zone is in use. */
2633
2634 static inline bool
2635 ix86_using_red_zone (void)
2636 {
2637 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2638 }
2639 \f
2640 /* Return a string that documents the current -m options. The caller is
2641 responsible for freeing the string. */
2642
2643 static char *
2644 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2645 const char *tune, enum fpmath_unit fpmath,
2646 bool add_nl_p)
2647 {
2648 struct ix86_target_opts
2649 {
2650 const char *option; /* option string */
2651 HOST_WIDE_INT mask; /* isa mask options */
2652 };
2653
2654 /* This table is ordered so that options like -msse4.2 that imply
2655 preceding options while match those first. */
2656 static struct ix86_target_opts isa_opts[] =
2657 {
2658 { "-m64", OPTION_MASK_ISA_64BIT },
2659 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2660 { "-mfma", OPTION_MASK_ISA_FMA },
2661 { "-mxop", OPTION_MASK_ISA_XOP },
2662 { "-mlwp", OPTION_MASK_ISA_LWP },
2663 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2664 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2665 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2666 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2667 { "-msse3", OPTION_MASK_ISA_SSE3 },
2668 { "-msse2", OPTION_MASK_ISA_SSE2 },
2669 { "-msse", OPTION_MASK_ISA_SSE },
2670 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2671 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2672 { "-mmmx", OPTION_MASK_ISA_MMX },
2673 { "-mabm", OPTION_MASK_ISA_ABM },
2674 { "-mbmi", OPTION_MASK_ISA_BMI },
2675 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2676 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2677 { "-mtbm", OPTION_MASK_ISA_TBM },
2678 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2679 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2680 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2681 { "-maes", OPTION_MASK_ISA_AES },
2682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2683 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2684 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2685 { "-mf16c", OPTION_MASK_ISA_F16C },
2686 };
2687
2688 /* Flag options. */
2689 static struct ix86_target_opts flag_opts[] =
2690 {
2691 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2692 { "-m80387", MASK_80387 },
2693 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2694 { "-malign-double", MASK_ALIGN_DOUBLE },
2695 { "-mcld", MASK_CLD },
2696 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2697 { "-mieee-fp", MASK_IEEE_FP },
2698 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2699 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2700 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2701 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2702 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2703 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2704 { "-mno-red-zone", MASK_NO_RED_ZONE },
2705 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2706 { "-mrecip", MASK_RECIP },
2707 { "-mrtd", MASK_RTD },
2708 { "-msseregparm", MASK_SSEREGPARM },
2709 { "-mstack-arg-probe", MASK_STACK_PROBE },
2710 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2711 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2712 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2713 { "-mvzeroupper", MASK_VZEROUPPER },
2714 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2715 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2716 { "-mprefer-avx128", MASK_PREFER_AVX128},
2717 };
2718
2719 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2720
2721 char isa_other[40];
2722 char target_other[40];
2723 unsigned num = 0;
2724 unsigned i, j;
2725 char *ret;
2726 char *ptr;
2727 size_t len;
2728 size_t line_len;
2729 size_t sep_len;
2730
2731 memset (opts, '\0', sizeof (opts));
2732
2733 /* Add -march= option. */
2734 if (arch)
2735 {
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2738 }
2739
2740 /* Add -mtune= option. */
2741 if (tune)
2742 {
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2745 }
2746
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 {
2750 if ((isa & isa_opts[i].mask) != 0)
2751 {
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2754 }
2755 }
2756
2757 if (isa && add_nl_p)
2758 {
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2761 isa);
2762 }
2763
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 {
2767 if ((flags & flag_opts[i].mask) != 0)
2768 {
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2771 }
2772 }
2773
2774 if (flags && add_nl_p)
2775 {
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2778 }
2779
2780 /* Add -fpmath= option. */
2781 if (fpmath)
2782 {
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2785 {
2786 case FPMATH_387:
2787 opts[num++][1] = "387";
2788 break;
2789
2790 case FPMATH_SSE:
2791 opts[num++][1] = "sse";
2792 break;
2793
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2796 break;
2797
2798 default:
2799 gcc_unreachable ();
2800 }
2801 }
2802
2803 /* Any options? */
2804 if (num == 0)
2805 return NULL;
2806
2807 gcc_assert (num < ARRAY_SIZE (opts));
2808
2809 /* Size the string. */
2810 len = 0;
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2813 {
2814 len += sep_len;
2815 for (j = 0; j < 2; j++)
2816 if (opts[i][j])
2817 len += strlen (opts[i][j]);
2818 }
2819
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2822 line_len = 0;
2823
2824 for (i = 0; i < num; i++)
2825 {
2826 size_t len2[2];
2827
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2830
2831 if (i != 0)
2832 {
2833 *ptr++ = ' ';
2834 line_len++;
2835
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2837 {
2838 *ptr++ = '\\';
2839 *ptr++ = '\n';
2840 line_len = 0;
2841 }
2842 }
2843
2844 for (j = 0; j < 2; j++)
2845 if (opts[i][j])
2846 {
2847 memcpy (ptr, opts[i][j], len2[j]);
2848 ptr += len2[j];
2849 line_len += len2[j];
2850 }
2851 }
2852
2853 *ptr = '\0';
2854 gcc_assert (ret + len >= ptr);
2855
2856 return ret;
2857 }
2858
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2862 static bool
2863 ix86_profile_before_prologue (void)
2864 {
2865 return flag_fentry != 0;
2866 }
2867
2868 /* Function that is callable from the debugger to print the current
2869 options. */
2870 void
2871 ix86_debug_options (void)
2872 {
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2875 ix86_fpmath, true);
2876
2877 if (opts)
2878 {
2879 fprintf (stderr, "%s\n\n", opts);
2880 free (opts);
2881 }
2882 else
2883 fputs ("<no options>\n\n", stderr);
2884
2885 return;
2886 }
2887 \f
2888 /* Override various settings based on options. If MAIN_ARGS_P, the
2889 options are from the command line, otherwise they are from
2890 attributes. */
2891
2892 static void
2893 ix86_option_override_internal (bool main_args_p)
2894 {
2895 int i;
2896 unsigned int ix86_arch_mask, ix86_tune_mask;
2897 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2898 const char *prefix;
2899 const char *suffix;
2900 const char *sw;
2901
2902 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2903 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2904 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2905 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2906 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2907 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2908 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2909 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2910 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2911 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2912 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2913 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2914 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2915 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2916 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2917 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2918 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2919 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2920 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2921 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2922 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2923 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2924 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2925 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2926 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2927 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2928 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2929 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2930 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2931 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2932 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2933 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2934 /* if this reaches 64, need to widen struct pta flags below */
2935
2936 static struct pta
2937 {
2938 const char *const name; /* processor name or nickname. */
2939 const enum processor_type processor;
2940 const enum attr_cpu schedule;
2941 const unsigned HOST_WIDE_INT flags;
2942 }
2943 const processor_alias_table[] =
2944 {
2945 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2946 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2947 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2949 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2950 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2951 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2953 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2954 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2956 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2957 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2958 PTA_MMX | PTA_SSE},
2959 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2960 PTA_MMX | PTA_SSE},
2961 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2962 PTA_MMX | PTA_SSE | PTA_SSE2},
2963 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2964 PTA_MMX |PTA_SSE | PTA_SSE2},
2965 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2966 PTA_MMX | PTA_SSE | PTA_SSE2},
2967 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2968 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2969 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2970 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2971 | PTA_CX16 | PTA_NO_SAHF},
2972 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_CX16},
2975 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2976 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2977 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2978 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2979 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2980 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2981 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2982 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2983 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2984 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2985 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2986 | PTA_RDRND | PTA_F16C},
2987 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2992 | PTA_FMA | PTA_MOVBE},
2993 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2996 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2997 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2998 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2999 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3001 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3004 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3005 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3006 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3007 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3008 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3009 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3010 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3011 {"x86-64", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3013 {"k8", PROCESSOR_K8, CPU_K8,
3014 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3015 | PTA_SSE2 | PTA_NO_SAHF},
3016 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3018 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3019 {"opteron", PROCESSOR_K8, CPU_K8,
3020 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3021 | PTA_SSE2 | PTA_NO_SAHF},
3022 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3023 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3024 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3025 {"athlon64", PROCESSOR_K8, CPU_K8,
3026 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3027 | PTA_SSE2 | PTA_NO_SAHF},
3028 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3030 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3031 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3032 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3033 | PTA_SSE2 | PTA_NO_SAHF},
3034 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3035 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3036 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3037 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3038 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3039 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3040 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3041 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3042 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3043 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3044 | PTA_XOP | PTA_LWP},
3045 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3046 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3047 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3048 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3050 | PTA_FMA},
3051 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3052 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3053 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3054 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3055 0 /* flags are only used for -march switch. */ },
3056 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3057 PTA_64BIT /* flags are only used for -march switch. */ },
3058 };
3059
3060 /* -mrecip options. */
3061 static struct
3062 {
3063 const char *string; /* option name */
3064 unsigned int mask; /* mask bits to set */
3065 }
3066 const recip_options[] =
3067 {
3068 { "all", RECIP_MASK_ALL },
3069 { "none", RECIP_MASK_NONE },
3070 { "div", RECIP_MASK_DIV },
3071 { "sqrt", RECIP_MASK_SQRT },
3072 { "vec-div", RECIP_MASK_VEC_DIV },
3073 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3074 };
3075
3076 int const pta_size = ARRAY_SIZE (processor_alias_table);
3077
3078 /* Set up prefix/suffix so the error messages refer to either the command
3079 line argument, or the attribute(target). */
3080 if (main_args_p)
3081 {
3082 prefix = "-m";
3083 suffix = "";
3084 sw = "switch";
3085 }
3086 else
3087 {
3088 prefix = "option(\"";
3089 suffix = "\")";
3090 sw = "attribute";
3091 }
3092
3093 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3094 SUBTARGET_OVERRIDE_OPTIONS;
3095 #endif
3096
3097 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3098 SUBSUBTARGET_OVERRIDE_OPTIONS;
3099 #endif
3100
3101 if (TARGET_X32)
3102 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3103
3104 /* -fPIC is the default for x86_64. */
3105 if (TARGET_MACHO && TARGET_64BIT)
3106 flag_pic = 2;
3107
3108 /* Need to check -mtune=generic first. */
3109 if (ix86_tune_string)
3110 {
3111 if (!strcmp (ix86_tune_string, "generic")
3112 || !strcmp (ix86_tune_string, "i686")
3113 /* As special support for cross compilers we read -mtune=native
3114 as -mtune=generic. With native compilers we won't see the
3115 -mtune=native, as it was changed by the driver. */
3116 || !strcmp (ix86_tune_string, "native"))
3117 {
3118 if (TARGET_64BIT)
3119 ix86_tune_string = "generic64";
3120 else
3121 ix86_tune_string = "generic32";
3122 }
3123 /* If this call is for setting the option attribute, allow the
3124 generic32/generic64 that was previously set. */
3125 else if (!main_args_p
3126 && (!strcmp (ix86_tune_string, "generic32")
3127 || !strcmp (ix86_tune_string, "generic64")))
3128 ;
3129 else if (!strncmp (ix86_tune_string, "generic", 7))
3130 error ("bad value (%s) for %stune=%s %s",
3131 ix86_tune_string, prefix, suffix, sw);
3132 else if (!strcmp (ix86_tune_string, "x86-64"))
3133 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3134 "%stune=k8%s or %stune=generic%s instead as appropriate",
3135 prefix, suffix, prefix, suffix, prefix, suffix);
3136 }
3137 else
3138 {
3139 if (ix86_arch_string)
3140 ix86_tune_string = ix86_arch_string;
3141 if (!ix86_tune_string)
3142 {
3143 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3144 ix86_tune_defaulted = 1;
3145 }
3146
3147 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3148 need to use a sensible tune option. */
3149 if (!strcmp (ix86_tune_string, "generic")
3150 || !strcmp (ix86_tune_string, "x86-64")
3151 || !strcmp (ix86_tune_string, "i686"))
3152 {
3153 if (TARGET_64BIT)
3154 ix86_tune_string = "generic64";
3155 else
3156 ix86_tune_string = "generic32";
3157 }
3158 }
3159
3160 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3161 {
3162 /* rep; movq isn't available in 32-bit code. */
3163 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3164 ix86_stringop_alg = no_stringop;
3165 }
3166
3167 if (!ix86_arch_string)
3168 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3169 else
3170 ix86_arch_specified = 1;
3171
3172 if (!global_options_set.x_ix86_abi)
3173 ix86_abi = DEFAULT_ABI;
3174
3175 if (global_options_set.x_ix86_cmodel)
3176 {
3177 switch (ix86_cmodel)
3178 {
3179 case CM_SMALL:
3180 case CM_SMALL_PIC:
3181 if (flag_pic)
3182 ix86_cmodel = CM_SMALL_PIC;
3183 if (!TARGET_64BIT)
3184 error ("code model %qs not supported in the %s bit mode",
3185 "small", "32");
3186 break;
3187
3188 case CM_MEDIUM:
3189 case CM_MEDIUM_PIC:
3190 if (flag_pic)
3191 ix86_cmodel = CM_MEDIUM_PIC;
3192 if (!TARGET_64BIT)
3193 error ("code model %qs not supported in the %s bit mode",
3194 "medium", "32");
3195 else if (TARGET_X32)
3196 error ("code model %qs not supported in x32 mode",
3197 "medium");
3198 break;
3199
3200 case CM_LARGE:
3201 case CM_LARGE_PIC:
3202 if (flag_pic)
3203 ix86_cmodel = CM_LARGE_PIC;
3204 if (!TARGET_64BIT)
3205 error ("code model %qs not supported in the %s bit mode",
3206 "large", "32");
3207 else if (TARGET_X32)
3208 error ("code model %qs not supported in x32 mode",
3209 "medium");
3210 break;
3211
3212 case CM_32:
3213 if (flag_pic)
3214 error ("code model %s does not support PIC mode", "32");
3215 if (TARGET_64BIT)
3216 error ("code model %qs not supported in the %s bit mode",
3217 "32", "64");
3218 break;
3219
3220 case CM_KERNEL:
3221 if (flag_pic)
3222 {
3223 error ("code model %s does not support PIC mode", "kernel");
3224 ix86_cmodel = CM_32;
3225 }
3226 if (!TARGET_64BIT)
3227 error ("code model %qs not supported in the %s bit mode",
3228 "kernel", "32");
3229 break;
3230
3231 default:
3232 gcc_unreachable ();
3233 }
3234 }
3235 else
3236 {
3237 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3238 use of rip-relative addressing. This eliminates fixups that
3239 would otherwise be needed if this object is to be placed in a
3240 DLL, and is essentially just as efficient as direct addressing. */
3241 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3242 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3243 else if (TARGET_64BIT)
3244 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3245 else
3246 ix86_cmodel = CM_32;
3247 }
3248 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3249 {
3250 error ("-masm=intel not supported in this configuration");
3251 ix86_asm_dialect = ASM_ATT;
3252 }
3253 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3254 sorry ("%i-bit mode not compiled in",
3255 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3256
3257 for (i = 0; i < pta_size; i++)
3258 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3259 {
3260 ix86_schedule = processor_alias_table[i].schedule;
3261 ix86_arch = processor_alias_table[i].processor;
3262 /* Default cpu tuning to the architecture. */
3263 ix86_tune = ix86_arch;
3264
3265 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3266 error ("CPU you selected does not support x86-64 "
3267 "instruction set");
3268
3269 if (processor_alias_table[i].flags & PTA_MMX
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3271 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3272 if (processor_alias_table[i].flags & PTA_3DNOW
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3274 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3275 if (processor_alias_table[i].flags & PTA_3DNOW_A
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3277 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3278 if (processor_alias_table[i].flags & PTA_SSE
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3280 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3281 if (processor_alias_table[i].flags & PTA_SSE2
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3283 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3284 if (processor_alias_table[i].flags & PTA_SSE3
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3286 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3287 if (processor_alias_table[i].flags & PTA_SSSE3
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3289 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3290 if (processor_alias_table[i].flags & PTA_SSE4_1
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3292 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3293 if (processor_alias_table[i].flags & PTA_SSE4_2
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3296 if (processor_alias_table[i].flags & PTA_AVX
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3298 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3299 if (processor_alias_table[i].flags & PTA_AVX2
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3301 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3302 if (processor_alias_table[i].flags & PTA_FMA
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3304 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3305 if (processor_alias_table[i].flags & PTA_SSE4A
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3307 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3308 if (processor_alias_table[i].flags & PTA_FMA4
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3310 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3311 if (processor_alias_table[i].flags & PTA_XOP
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3313 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3314 if (processor_alias_table[i].flags & PTA_LWP
3315 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3316 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3317 if (processor_alias_table[i].flags & PTA_ABM
3318 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3319 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3320 if (processor_alias_table[i].flags & PTA_BMI
3321 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3322 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3323 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3324 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3325 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3326 if (processor_alias_table[i].flags & PTA_TBM
3327 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3328 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3329 if (processor_alias_table[i].flags & PTA_BMI2
3330 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3331 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3332 if (processor_alias_table[i].flags & PTA_CX16
3333 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3334 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3335 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3336 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3337 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3338 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3339 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3340 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3341 if (processor_alias_table[i].flags & PTA_MOVBE
3342 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3343 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3344 if (processor_alias_table[i].flags & PTA_AES
3345 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3346 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3347 if (processor_alias_table[i].flags & PTA_PCLMUL
3348 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3350 if (processor_alias_table[i].flags & PTA_FSGSBASE
3351 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3352 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3353 if (processor_alias_table[i].flags & PTA_RDRND
3354 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3355 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3356 if (processor_alias_table[i].flags & PTA_F16C
3357 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3358 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3359 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3360 x86_prefetch_sse = true;
3361
3362 break;
3363 }
3364
3365 if (!strcmp (ix86_arch_string, "generic"))
3366 error ("generic CPU can be used only for %stune=%s %s",
3367 prefix, suffix, sw);
3368 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3369 error ("bad value (%s) for %sarch=%s %s",
3370 ix86_arch_string, prefix, suffix, sw);
3371
3372 ix86_arch_mask = 1u << ix86_arch;
3373 for (i = 0; i < X86_ARCH_LAST; ++i)
3374 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3375
3376 for (i = 0; i < pta_size; i++)
3377 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3378 {
3379 ix86_schedule = processor_alias_table[i].schedule;
3380 ix86_tune = processor_alias_table[i].processor;
3381 if (TARGET_64BIT)
3382 {
3383 if (!(processor_alias_table[i].flags & PTA_64BIT))
3384 {
3385 if (ix86_tune_defaulted)
3386 {
3387 ix86_tune_string = "x86-64";
3388 for (i = 0; i < pta_size; i++)
3389 if (! strcmp (ix86_tune_string,
3390 processor_alias_table[i].name))
3391 break;
3392 ix86_schedule = processor_alias_table[i].schedule;
3393 ix86_tune = processor_alias_table[i].processor;
3394 }
3395 else
3396 error ("CPU you selected does not support x86-64 "
3397 "instruction set");
3398 }
3399 }
3400 else
3401 {
3402 /* Adjust tuning when compiling for 32-bit ABI. */
3403 switch (ix86_tune)
3404 {
3405 case PROCESSOR_GENERIC64:
3406 ix86_tune = PROCESSOR_GENERIC32;
3407 ix86_schedule = CPU_PENTIUMPRO;
3408 break;
3409
3410 case PROCESSOR_CORE2_64:
3411 ix86_tune = PROCESSOR_CORE2_32;
3412 break;
3413
3414 case PROCESSOR_COREI7_64:
3415 ix86_tune = PROCESSOR_COREI7_32;
3416 break;
3417
3418 default:
3419 break;
3420 }
3421 }
3422 /* Intel CPUs have always interpreted SSE prefetch instructions as
3423 NOPs; so, we can enable SSE prefetch instructions even when
3424 -mtune (rather than -march) points us to a processor that has them.
3425 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3426 higher processors. */
3427 if (TARGET_CMOVE
3428 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3429 x86_prefetch_sse = true;
3430 break;
3431 }
3432
3433 if (ix86_tune_specified && i == pta_size)
3434 error ("bad value (%s) for %stune=%s %s",
3435 ix86_tune_string, prefix, suffix, sw);
3436
3437 ix86_tune_mask = 1u << ix86_tune;
3438 for (i = 0; i < X86_TUNE_LAST; ++i)
3439 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3440
3441 #ifndef USE_IX86_FRAME_POINTER
3442 #define USE_IX86_FRAME_POINTER 0
3443 #endif
3444
3445 #ifndef USE_X86_64_FRAME_POINTER
3446 #define USE_X86_64_FRAME_POINTER 0
3447 #endif
3448
3449 /* Set the default values for switches whose default depends on TARGET_64BIT
3450 in case they weren't overwritten by command line options. */
3451 if (TARGET_64BIT)
3452 {
3453 if (optimize > 1 && !global_options_set.x_flag_zee)
3454 flag_zee = 1;
3455 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3456 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3457 if (flag_asynchronous_unwind_tables == 2)
3458 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3459 if (flag_pcc_struct_return == 2)
3460 flag_pcc_struct_return = 0;
3461 }
3462 else
3463 {
3464 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3465 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3466 if (flag_asynchronous_unwind_tables == 2)
3467 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3468 if (flag_pcc_struct_return == 2)
3469 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3470 }
3471
3472 if (optimize_size)
3473 ix86_cost = &ix86_size_cost;
3474 else
3475 ix86_cost = processor_target_table[ix86_tune].cost;
3476
3477 /* Arrange to set up i386_stack_locals for all functions. */
3478 init_machine_status = ix86_init_machine_status;
3479
3480 /* Validate -mregparm= value. */
3481 if (global_options_set.x_ix86_regparm)
3482 {
3483 if (TARGET_64BIT)
3484 warning (0, "-mregparm is ignored in 64-bit mode");
3485 if (ix86_regparm > REGPARM_MAX)
3486 {
3487 error ("-mregparm=%d is not between 0 and %d",
3488 ix86_regparm, REGPARM_MAX);
3489 ix86_regparm = 0;
3490 }
3491 }
3492 if (TARGET_64BIT)
3493 ix86_regparm = REGPARM_MAX;
3494
3495 /* Default align_* from the processor table. */
3496 if (align_loops == 0)
3497 {
3498 align_loops = processor_target_table[ix86_tune].align_loop;
3499 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3500 }
3501 if (align_jumps == 0)
3502 {
3503 align_jumps = processor_target_table[ix86_tune].align_jump;
3504 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3505 }
3506 if (align_functions == 0)
3507 {
3508 align_functions = processor_target_table[ix86_tune].align_func;
3509 }
3510
3511 /* Provide default for -mbranch-cost= value. */
3512 if (!global_options_set.x_ix86_branch_cost)
3513 ix86_branch_cost = ix86_cost->branch_cost;
3514
3515 if (TARGET_64BIT)
3516 {
3517 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3518
3519 /* Enable by default the SSE and MMX builtins. Do allow the user to
3520 explicitly disable any of these. In particular, disabling SSE and
3521 MMX for kernel code is extremely useful. */
3522 if (!ix86_arch_specified)
3523 ix86_isa_flags
3524 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3525 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3526
3527 if (TARGET_RTD)
3528 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3529 }
3530 else
3531 {
3532 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3533
3534 if (!ix86_arch_specified)
3535 ix86_isa_flags
3536 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3537
3538 /* i386 ABI does not specify red zone. It still makes sense to use it
3539 when programmer takes care to stack from being destroyed. */
3540 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3541 target_flags |= MASK_NO_RED_ZONE;
3542 }
3543
3544 /* Keep nonleaf frame pointers. */
3545 if (flag_omit_frame_pointer)
3546 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3547 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3548 flag_omit_frame_pointer = 1;
3549
3550 /* If we're doing fast math, we don't care about comparison order
3551 wrt NaNs. This lets us use a shorter comparison sequence. */
3552 if (flag_finite_math_only)
3553 target_flags &= ~MASK_IEEE_FP;
3554
3555 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3556 since the insns won't need emulation. */
3557 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3558 target_flags &= ~MASK_NO_FANCY_MATH_387;
3559
3560 /* Likewise, if the target doesn't have a 387, or we've specified
3561 software floating point, don't use 387 inline intrinsics. */
3562 if (!TARGET_80387)
3563 target_flags |= MASK_NO_FANCY_MATH_387;
3564
3565 /* Turn on MMX builtins for -msse. */
3566 if (TARGET_SSE)
3567 {
3568 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3569 x86_prefetch_sse = true;
3570 }
3571
3572 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3573 if (TARGET_SSE4_2 || TARGET_ABM)
3574 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3575
3576 /* Turn on lzcnt instruction for -mabm. */
3577 if (TARGET_ABM)
3578 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3579
3580 /* Validate -mpreferred-stack-boundary= value or default it to
3581 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3582 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3583 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3584 {
3585 int min = (TARGET_64BIT ? 4 : 2);
3586 int max = (TARGET_SEH ? 4 : 12);
3587
3588 if (ix86_preferred_stack_boundary_arg < min
3589 || ix86_preferred_stack_boundary_arg > max)
3590 {
3591 if (min == max)
3592 error ("-mpreferred-stack-boundary is not supported "
3593 "for this target");
3594 else
3595 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3596 ix86_preferred_stack_boundary_arg, min, max);
3597 }
3598 else
3599 ix86_preferred_stack_boundary
3600 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3601 }
3602
3603 /* Set the default value for -mstackrealign. */
3604 if (ix86_force_align_arg_pointer == -1)
3605 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3606
3607 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3608
3609 /* Validate -mincoming-stack-boundary= value or default it to
3610 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3611 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3612 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3613 {
3614 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3615 || ix86_incoming_stack_boundary_arg > 12)
3616 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3617 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3618 else
3619 {
3620 ix86_user_incoming_stack_boundary
3621 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3622 ix86_incoming_stack_boundary
3623 = ix86_user_incoming_stack_boundary;
3624 }
3625 }
3626
3627 /* Accept -msseregparm only if at least SSE support is enabled. */
3628 if (TARGET_SSEREGPARM
3629 && ! TARGET_SSE)
3630 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3631
3632 if (global_options_set.x_ix86_fpmath)
3633 {
3634 if (ix86_fpmath & FPMATH_SSE)
3635 {
3636 if (!TARGET_SSE)
3637 {
3638 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3639 ix86_fpmath = FPMATH_387;
3640 }
3641 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3642 {
3643 warning (0, "387 instruction set disabled, using SSE arithmetics");
3644 ix86_fpmath = FPMATH_SSE;
3645 }
3646 }
3647 }
3648 else
3649 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3650
3651 /* If the i387 is disabled, then do not return values in it. */
3652 if (!TARGET_80387)
3653 target_flags &= ~MASK_FLOAT_RETURNS;
3654
3655 /* Use external vectorized library in vectorizing intrinsics. */
3656 if (global_options_set.x_ix86_veclibabi_type)
3657 switch (ix86_veclibabi_type)
3658 {
3659 case ix86_veclibabi_type_svml:
3660 ix86_veclib_handler = ix86_veclibabi_svml;
3661 break;
3662
3663 case ix86_veclibabi_type_acml:
3664 ix86_veclib_handler = ix86_veclibabi_acml;
3665 break;
3666
3667 default:
3668 gcc_unreachable ();
3669 }
3670
3671 if ((!USE_IX86_FRAME_POINTER
3672 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3673 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3674 && !optimize_size)
3675 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3676
3677 /* ??? Unwind info is not correct around the CFG unless either a frame
3678 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3679 unwind info generation to be aware of the CFG and propagating states
3680 around edges. */
3681 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3682 || flag_exceptions || flag_non_call_exceptions)
3683 && flag_omit_frame_pointer
3684 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3685 {
3686 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3687 warning (0, "unwind tables currently require either a frame pointer "
3688 "or %saccumulate-outgoing-args%s for correctness",
3689 prefix, suffix);
3690 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3691 }
3692
3693 /* If stack probes are required, the space used for large function
3694 arguments on the stack must also be probed, so enable
3695 -maccumulate-outgoing-args so this happens in the prologue. */
3696 if (TARGET_STACK_PROBE
3697 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3698 {
3699 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3700 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3701 "for correctness", prefix, suffix);
3702 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3703 }
3704
3705 /* For sane SSE instruction set generation we need fcomi instruction.
3706 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3707 expands to a sequence that includes conditional move. */
3708 if (TARGET_SSE || TARGET_RDRND)
3709 TARGET_CMOVE = 1;
3710
3711 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3712 {
3713 char *p;
3714 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3715 p = strchr (internal_label_prefix, 'X');
3716 internal_label_prefix_len = p - internal_label_prefix;
3717 *p = '\0';
3718 }
3719
3720 /* When scheduling description is not available, disable scheduler pass
3721 so it won't slow down the compilation and make x87 code slower. */
3722 if (!TARGET_SCHEDULE)
3723 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724
3725 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3726 ix86_cost->simultaneous_prefetches,
3727 global_options.x_param_values,
3728 global_options_set.x_param_values);
3729 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3730 global_options.x_param_values,
3731 global_options_set.x_param_values);
3732 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3733 global_options.x_param_values,
3734 global_options_set.x_param_values);
3735 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3736 global_options.x_param_values,
3737 global_options_set.x_param_values);
3738
3739 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3740 if (flag_prefetch_loop_arrays < 0
3741 && HAVE_prefetch
3742 && optimize >= 3
3743 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3744 flag_prefetch_loop_arrays = 1;
3745
3746 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3747 can be optimized to ap = __builtin_next_arg (0). */
3748 if (!TARGET_64BIT && !flag_split_stack)
3749 targetm.expand_builtin_va_start = NULL;
3750
3751 if (TARGET_64BIT)
3752 {
3753 ix86_gen_leave = gen_leave_rex64;
3754 ix86_gen_add3 = gen_adddi3;
3755 ix86_gen_sub3 = gen_subdi3;
3756 ix86_gen_sub3_carry = gen_subdi3_carry;
3757 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3758 ix86_gen_monitor = gen_sse3_monitor64;
3759 ix86_gen_andsp = gen_anddi3;
3760 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3761 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3762 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3763 }
3764 else
3765 {
3766 ix86_gen_leave = gen_leave;
3767 ix86_gen_add3 = gen_addsi3;
3768 ix86_gen_sub3 = gen_subsi3;
3769 ix86_gen_sub3_carry = gen_subsi3_carry;
3770 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3771 ix86_gen_monitor = gen_sse3_monitor;
3772 ix86_gen_andsp = gen_andsi3;
3773 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3774 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3775 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3776 }
3777
3778 #ifdef USE_IX86_CLD
3779 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 if (!TARGET_64BIT)
3781 target_flags |= MASK_CLD & ~target_flags_explicit;
3782 #endif
3783
3784 if (!TARGET_64BIT && flag_pic)
3785 {
3786 if (flag_fentry > 0)
3787 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3788 "with -fpic");
3789 flag_fentry = 0;
3790 }
3791 else if (TARGET_SEH)
3792 {
3793 if (flag_fentry == 0)
3794 sorry ("-mno-fentry isn%'t compatible with SEH");
3795 flag_fentry = 1;
3796 }
3797 else if (flag_fentry < 0)
3798 {
3799 #if defined(PROFILE_BEFORE_PROLOGUE)
3800 flag_fentry = 1;
3801 #else
3802 flag_fentry = 0;
3803 #endif
3804 }
3805
3806 if (TARGET_AVX)
3807 {
3808 /* When not optimize for size, enable vzeroupper optimization for
3809 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3810 AVX unaligned load/store. */
3811 if (!optimize_size)
3812 {
3813 if (flag_expensive_optimizations
3814 && !(target_flags_explicit & MASK_VZEROUPPER))
3815 target_flags |= MASK_VZEROUPPER;
3816 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3817 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3818 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3819 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3820 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3821 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3822 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3823 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3824 target_flags |= MASK_PREFER_AVX128;
3825 }
3826 }
3827 else
3828 {
3829 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3830 target_flags &= ~MASK_VZEROUPPER;
3831 }
3832
3833 if (ix86_recip_name)
3834 {
3835 char *p = ASTRDUP (ix86_recip_name);
3836 char *q;
3837 unsigned int mask, i;
3838 bool invert;
3839
3840 while ((q = strtok (p, ",")) != NULL)
3841 {
3842 p = NULL;
3843 if (*q == '!')
3844 {
3845 invert = true;
3846 q++;
3847 }
3848 else
3849 invert = false;
3850
3851 if (!strcmp (q, "default"))
3852 mask = RECIP_MASK_ALL;
3853 else
3854 {
3855 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3856 if (!strcmp (q, recip_options[i].string))
3857 {
3858 mask = recip_options[i].mask;
3859 break;
3860 }
3861
3862 if (i == ARRAY_SIZE (recip_options))
3863 {
3864 error ("unknown option for -mrecip=%s", q);
3865 invert = false;
3866 mask = RECIP_MASK_NONE;
3867 }
3868 }
3869
3870 recip_mask_explicit |= mask;
3871 if (invert)
3872 recip_mask &= ~mask;
3873 else
3874 recip_mask |= mask;
3875 }
3876 }
3877
3878 if (TARGET_RECIP)
3879 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3880 else if (target_flags_explicit & MASK_RECIP)
3881 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882
3883 /* Save the initial options in case the user does function specific
3884 options. */
3885 if (main_args_p)
3886 target_option_default_node = target_option_current_node
3887 = build_target_option_node ();
3888 }
3889
3890 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3891
3892 static bool
3893 function_pass_avx256_p (const_rtx val)
3894 {
3895 if (!val)
3896 return false;
3897
3898 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3899 return true;
3900
3901 if (GET_CODE (val) == PARALLEL)
3902 {
3903 int i;
3904 rtx r;
3905
3906 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 {
3908 r = XVECEXP (val, 0, i);
3909 if (GET_CODE (r) == EXPR_LIST
3910 && XEXP (r, 0)
3911 && REG_P (XEXP (r, 0))
3912 && (GET_MODE (XEXP (r, 0)) == OImode
3913 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3914 return true;
3915 }
3916 }
3917
3918 return false;
3919 }
3920
3921 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3922
3923 static void
3924 ix86_option_override (void)
3925 {
3926 ix86_option_override_internal (true);
3927 }
3928
3929 /* Update register usage after having seen the compiler flags. */
3930
3931 static void
3932 ix86_conditional_register_usage (void)
3933 {
3934 int i;
3935 unsigned int j;
3936
3937 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 {
3939 if (fixed_regs[i] > 1)
3940 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3941 if (call_used_regs[i] > 1)
3942 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3943 }
3944
3945 /* The PIC register, if it exists, is fixed. */
3946 j = PIC_OFFSET_TABLE_REGNUM;
3947 if (j != INVALID_REGNUM)
3948 fixed_regs[j] = call_used_regs[j] = 1;
3949
3950 /* The 64-bit MS_ABI changes the set of call-used registers. */
3951 if (TARGET_64BIT_MS_ABI)
3952 {
3953 call_used_regs[SI_REG] = 0;
3954 call_used_regs[DI_REG] = 0;
3955 call_used_regs[XMM6_REG] = 0;
3956 call_used_regs[XMM7_REG] = 0;
3957 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3958 call_used_regs[i] = 0;
3959 }
3960
3961 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3962 other call-clobbered regs for 64-bit. */
3963 if (TARGET_64BIT)
3964 {
3965 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966
3967 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3968 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3969 && call_used_regs[i])
3970 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3971 }
3972
3973 /* If MMX is disabled, squash the registers. */
3974 if (! TARGET_MMX)
3975 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3976 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3977 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978
3979 /* If SSE is disabled, squash the registers. */
3980 if (! TARGET_SSE)
3981 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3982 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3983 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984
3985 /* If the FPU is disabled, squash the registers. */
3986 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3987 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3988 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3989 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990
3991 /* If 32-bit, squash the 64-bit registers. */
3992 if (! TARGET_64BIT)
3993 {
3994 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 reg_names[i] = "";
3996 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3997 reg_names[i] = "";
3998 }
3999 }
4000
4001 \f
4002 /* Save the current options */
4003
4004 static void
4005 ix86_function_specific_save (struct cl_target_option *ptr)
4006 {
4007 ptr->arch = ix86_arch;
4008 ptr->schedule = ix86_schedule;
4009 ptr->tune = ix86_tune;
4010 ptr->branch_cost = ix86_branch_cost;
4011 ptr->tune_defaulted = ix86_tune_defaulted;
4012 ptr->arch_specified = ix86_arch_specified;
4013 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4014 ptr->ix86_target_flags_explicit = target_flags_explicit;
4015 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016
4017 /* The fields are char but the variables are not; make sure the
4018 values fit in the fields. */
4019 gcc_assert (ptr->arch == ix86_arch);
4020 gcc_assert (ptr->schedule == ix86_schedule);
4021 gcc_assert (ptr->tune == ix86_tune);
4022 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4023 }
4024
4025 /* Restore the current options */
4026
4027 static void
4028 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 {
4030 enum processor_type old_tune = ix86_tune;
4031 enum processor_type old_arch = ix86_arch;
4032 unsigned int ix86_arch_mask, ix86_tune_mask;
4033 int i;
4034
4035 ix86_arch = (enum processor_type) ptr->arch;
4036 ix86_schedule = (enum attr_cpu) ptr->schedule;
4037 ix86_tune = (enum processor_type) ptr->tune;
4038 ix86_branch_cost = ptr->branch_cost;
4039 ix86_tune_defaulted = ptr->tune_defaulted;
4040 ix86_arch_specified = ptr->arch_specified;
4041 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4042 target_flags_explicit = ptr->ix86_target_flags_explicit;
4043 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044
4045 /* Recreate the arch feature tests if the arch changed */
4046 if (old_arch != ix86_arch)
4047 {
4048 ix86_arch_mask = 1u << ix86_arch;
4049 for (i = 0; i < X86_ARCH_LAST; ++i)
4050 ix86_arch_features[i]
4051 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4052 }
4053
4054 /* Recreate the tune optimization tests */
4055 if (old_tune != ix86_tune)
4056 {
4057 ix86_tune_mask = 1u << ix86_tune;
4058 for (i = 0; i < X86_TUNE_LAST; ++i)
4059 ix86_tune_features[i]
4060 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4061 }
4062 }
4063
4064 /* Print the current options */
4065
4066 static void
4067 ix86_function_specific_print (FILE *file, int indent,
4068 struct cl_target_option *ptr)
4069 {
4070 char *target_string
4071 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4072 NULL, NULL, ptr->x_ix86_fpmath, false);
4073
4074 fprintf (file, "%*sarch = %d (%s)\n",
4075 indent, "",
4076 ptr->arch,
4077 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4078 ? cpu_names[ptr->arch]
4079 : "<unknown>"));
4080
4081 fprintf (file, "%*stune = %d (%s)\n",
4082 indent, "",
4083 ptr->tune,
4084 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4085 ? cpu_names[ptr->tune]
4086 : "<unknown>"));
4087
4088 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4089
4090 if (target_string)
4091 {
4092 fprintf (file, "%*s%s\n", indent, "", target_string);
4093 free (target_string);
4094 }
4095 }
4096
4097 \f
4098 /* Inner function to process the attribute((target(...))), take an argument and
4099 set the current options from the argument. If we have a list, recursively go
4100 over the list. */
4101
4102 static bool
4103 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4104 struct gcc_options *enum_opts_set)
4105 {
4106 char *next_optstr;
4107 bool ret = true;
4108
4109 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4110 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4111 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4112 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4113 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4114
4115 enum ix86_opt_type
4116 {
4117 ix86_opt_unknown,
4118 ix86_opt_yes,
4119 ix86_opt_no,
4120 ix86_opt_str,
4121 ix86_opt_enum,
4122 ix86_opt_isa
4123 };
4124
4125 static const struct
4126 {
4127 const char *string;
4128 size_t len;
4129 enum ix86_opt_type type;
4130 int opt;
4131 int mask;
4132 } attrs[] = {
4133 /* isa options */
4134 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4135 IX86_ATTR_ISA ("abm", OPT_mabm),
4136 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4137 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4138 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4139 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4140 IX86_ATTR_ISA ("aes", OPT_maes),
4141 IX86_ATTR_ISA ("avx", OPT_mavx),
4142 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4143 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4144 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4145 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4146 IX86_ATTR_ISA ("sse", OPT_msse),
4147 IX86_ATTR_ISA ("sse2", OPT_msse2),
4148 IX86_ATTR_ISA ("sse3", OPT_msse3),
4149 IX86_ATTR_ISA ("sse4", OPT_msse4),
4150 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4151 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4152 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4153 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4154 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4155 IX86_ATTR_ISA ("fma", OPT_mfma),
4156 IX86_ATTR_ISA ("xop", OPT_mxop),
4157 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4158 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4159 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4160 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4161
4162 /* enum options */
4163 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164
4165 /* string options */
4166 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4167 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4168
4169 /* flag options */
4170 IX86_ATTR_YES ("cld",
4171 OPT_mcld,
4172 MASK_CLD),
4173
4174 IX86_ATTR_NO ("fancy-math-387",
4175 OPT_mfancy_math_387,
4176 MASK_NO_FANCY_MATH_387),
4177
4178 IX86_ATTR_YES ("ieee-fp",
4179 OPT_mieee_fp,
4180 MASK_IEEE_FP),
4181
4182 IX86_ATTR_YES ("inline-all-stringops",
4183 OPT_minline_all_stringops,
4184 MASK_INLINE_ALL_STRINGOPS),
4185
4186 IX86_ATTR_YES ("inline-stringops-dynamically",
4187 OPT_minline_stringops_dynamically,
4188 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189
4190 IX86_ATTR_NO ("align-stringops",
4191 OPT_mno_align_stringops,
4192 MASK_NO_ALIGN_STRINGOPS),
4193
4194 IX86_ATTR_YES ("recip",
4195 OPT_mrecip,
4196 MASK_RECIP),
4197
4198 };
4199
4200 /* If this is a list, recurse to get the options. */
4201 if (TREE_CODE (args) == TREE_LIST)
4202 {
4203 bool ret = true;
4204
4205 for (; args; args = TREE_CHAIN (args))
4206 if (TREE_VALUE (args)
4207 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4208 p_strings, enum_opts_set))
4209 ret = false;
4210
4211 return ret;
4212 }
4213
4214 else if (TREE_CODE (args) != STRING_CST)
4215 gcc_unreachable ();
4216
4217 /* Handle multiple arguments separated by commas. */
4218 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219
4220 while (next_optstr && *next_optstr != '\0')
4221 {
4222 char *p = next_optstr;
4223 char *orig_p = p;
4224 char *comma = strchr (next_optstr, ',');
4225 const char *opt_string;
4226 size_t len, opt_len;
4227 int opt;
4228 bool opt_set_p;
4229 char ch;
4230 unsigned i;
4231 enum ix86_opt_type type = ix86_opt_unknown;
4232 int mask = 0;
4233
4234 if (comma)
4235 {
4236 *comma = '\0';
4237 len = comma - next_optstr;
4238 next_optstr = comma + 1;
4239 }
4240 else
4241 {
4242 len = strlen (p);
4243 next_optstr = NULL;
4244 }
4245
4246 /* Recognize no-xxx. */
4247 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4248 {
4249 opt_set_p = false;
4250 p += 3;
4251 len -= 3;
4252 }
4253 else
4254 opt_set_p = true;
4255
4256 /* Find the option. */
4257 ch = *p;
4258 opt = N_OPTS;
4259 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 {
4261 type = attrs[i].type;
4262 opt_len = attrs[i].len;
4263 if (ch == attrs[i].string[0]
4264 && ((type != ix86_opt_str && type != ix86_opt_enum)
4265 ? len == opt_len
4266 : len > opt_len)
4267 && memcmp (p, attrs[i].string, opt_len) == 0)
4268 {
4269 opt = attrs[i].opt;
4270 mask = attrs[i].mask;
4271 opt_string = attrs[i].string;
4272 break;
4273 }
4274 }
4275
4276 /* Process the option. */
4277 if (opt == N_OPTS)
4278 {
4279 error ("attribute(target(\"%s\")) is unknown", orig_p);
4280 ret = false;
4281 }
4282
4283 else if (type == ix86_opt_isa)
4284 {
4285 struct cl_decoded_option decoded;
4286
4287 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4288 ix86_handle_option (&global_options, &global_options_set,
4289 &decoded, input_location);
4290 }
4291
4292 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 {
4294 if (type == ix86_opt_no)
4295 opt_set_p = !opt_set_p;
4296
4297 if (opt_set_p)
4298 target_flags |= mask;
4299 else
4300 target_flags &= ~mask;
4301 }
4302
4303 else if (type == ix86_opt_str)
4304 {
4305 if (p_strings[opt])
4306 {
4307 error ("option(\"%s\") was already specified", opt_string);
4308 ret = false;
4309 }
4310 else
4311 p_strings[opt] = xstrdup (p + opt_len);
4312 }
4313
4314 else if (type == ix86_opt_enum)
4315 {
4316 bool arg_ok;
4317 int value;
4318
4319 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 if (arg_ok)
4321 set_option (&global_options, enum_opts_set, opt, value,
4322 p + opt_len, DK_UNSPECIFIED, input_location,
4323 global_dc);
4324 else
4325 {
4326 error ("attribute(target(\"%s\")) is unknown", orig_p);
4327 ret = false;
4328 }
4329 }
4330
4331 else
4332 gcc_unreachable ();
4333 }
4334
4335 return ret;
4336 }
4337
4338 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4339
4340 tree
4341 ix86_valid_target_attribute_tree (tree args)
4342 {
4343 const char *orig_arch_string = ix86_arch_string;
4344 const char *orig_tune_string = ix86_tune_string;
4345 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4346 int orig_tune_defaulted = ix86_tune_defaulted;
4347 int orig_arch_specified = ix86_arch_specified;
4348 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4349 tree t = NULL_TREE;
4350 int i;
4351 struct cl_target_option *def
4352 = TREE_TARGET_OPTION (target_option_default_node);
4353 struct gcc_options enum_opts_set;
4354
4355 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356
4357 /* Process each of the options on the chain. */
4358 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4359 &enum_opts_set))
4360 return NULL_TREE;
4361
4362 /* If the changed options are different from the default, rerun
4363 ix86_option_override_internal, and then save the options away.
4364 The string options are are attribute options, and will be undone
4365 when we copy the save structure. */
4366 if (ix86_isa_flags != def->x_ix86_isa_flags
4367 || target_flags != def->x_target_flags
4368 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4369 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4370 || enum_opts_set.x_ix86_fpmath)
4371 {
4372 /* If we are using the default tune= or arch=, undo the string assigned,
4373 and use the default. */
4374 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4375 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4376 else if (!orig_arch_specified)
4377 ix86_arch_string = NULL;
4378
4379 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4380 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4381 else if (orig_tune_defaulted)
4382 ix86_tune_string = NULL;
4383
4384 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4385 if (enum_opts_set.x_ix86_fpmath)
4386 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4387 else if (!TARGET_64BIT && TARGET_SSE)
4388 {
4389 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4390 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4391 }
4392
4393 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4394 ix86_option_override_internal (false);
4395
4396 /* Add any builtin functions with the new isa if any. */
4397 ix86_add_new_builtins (ix86_isa_flags);
4398
4399 /* Save the current options unless we are validating options for
4400 #pragma. */
4401 t = build_target_option_node ();
4402
4403 ix86_arch_string = orig_arch_string;
4404 ix86_tune_string = orig_tune_string;
4405 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406
4407 /* Free up memory allocated to hold the strings */
4408 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4409 free (option_strings[i]);
4410 }
4411
4412 return t;
4413 }
4414
4415 /* Hook to validate attribute((target("string"))). */
4416
4417 static bool
4418 ix86_valid_target_attribute_p (tree fndecl,
4419 tree ARG_UNUSED (name),
4420 tree args,
4421 int ARG_UNUSED (flags))
4422 {
4423 struct cl_target_option cur_target;
4424 bool ret = true;
4425 tree old_optimize = build_optimization_node ();
4426 tree new_target, new_optimize;
4427 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428
4429 /* If the function changed the optimization levels as well as setting target
4430 options, start with the optimizations specified. */
4431 if (func_optimize && func_optimize != old_optimize)
4432 cl_optimization_restore (&global_options,
4433 TREE_OPTIMIZATION (func_optimize));
4434
4435 /* The target attributes may also change some optimization flags, so update
4436 the optimization options if necessary. */
4437 cl_target_option_save (&cur_target, &global_options);
4438 new_target = ix86_valid_target_attribute_tree (args);
4439 new_optimize = build_optimization_node ();
4440
4441 if (!new_target)
4442 ret = false;
4443
4444 else if (fndecl)
4445 {
4446 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447
4448 if (old_optimize != new_optimize)
4449 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4450 }
4451
4452 cl_target_option_restore (&global_options, &cur_target);
4453
4454 if (old_optimize != new_optimize)
4455 cl_optimization_restore (&global_options,
4456 TREE_OPTIMIZATION (old_optimize));
4457
4458 return ret;
4459 }
4460
4461 \f
4462 /* Hook to determine if one function can safely inline another. */
4463
4464 static bool
4465 ix86_can_inline_p (tree caller, tree callee)
4466 {
4467 bool ret = false;
4468 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4469 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470
4471 /* If callee has no option attributes, then it is ok to inline. */
4472 if (!callee_tree)
4473 ret = true;
4474
4475 /* If caller has no option attributes, but callee does then it is not ok to
4476 inline. */
4477 else if (!caller_tree)
4478 ret = false;
4479
4480 else
4481 {
4482 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4483 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484
4485 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4486 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 function. */
4488 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4489 != callee_opts->x_ix86_isa_flags)
4490 ret = false;
4491
4492 /* See if we have the same non-isa options. */
4493 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4494 ret = false;
4495
4496 /* See if arch, tune, etc. are the same. */
4497 else if (caller_opts->arch != callee_opts->arch)
4498 ret = false;
4499
4500 else if (caller_opts->tune != callee_opts->tune)
4501 ret = false;
4502
4503 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4504 ret = false;
4505
4506 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4507 ret = false;
4508
4509 else
4510 ret = true;
4511 }
4512
4513 return ret;
4514 }
4515
4516 \f
4517 /* Remember the last target of ix86_set_current_function. */
4518 static GTY(()) tree ix86_previous_fndecl;
4519
4520 /* Establish appropriate back-end context for processing the function
4521 FNDECL. The argument might be NULL to indicate processing at top
4522 level, outside of any function scope. */
4523 static void
4524 ix86_set_current_function (tree fndecl)
4525 {
4526 /* Only change the context if the function changes. This hook is called
4527 several times in the course of compiling a function, and we don't want to
4528 slow things down too much or call target_reinit when it isn't safe. */
4529 if (fndecl && fndecl != ix86_previous_fndecl)
4530 {
4531 tree old_tree = (ix86_previous_fndecl
4532 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4533 : NULL_TREE);
4534
4535 tree new_tree = (fndecl
4536 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4537 : NULL_TREE);
4538
4539 ix86_previous_fndecl = fndecl;
4540 if (old_tree == new_tree)
4541 ;
4542
4543 else if (new_tree)
4544 {
4545 cl_target_option_restore (&global_options,
4546 TREE_TARGET_OPTION (new_tree));
4547 target_reinit ();
4548 }
4549
4550 else if (old_tree)
4551 {
4552 struct cl_target_option *def
4553 = TREE_TARGET_OPTION (target_option_current_node);
4554
4555 cl_target_option_restore (&global_options, def);
4556 target_reinit ();
4557 }
4558 }
4559 }
4560
4561 \f
4562 /* Return true if this goes in large data/bss. */
4563
4564 static bool
4565 ix86_in_large_data_p (tree exp)
4566 {
4567 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4568 return false;
4569
4570 /* Functions are never large data. */
4571 if (TREE_CODE (exp) == FUNCTION_DECL)
4572 return false;
4573
4574 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 {
4576 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4577 if (strcmp (section, ".ldata") == 0
4578 || strcmp (section, ".lbss") == 0)
4579 return true;
4580 return false;
4581 }
4582 else
4583 {
4584 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585
4586 /* If this is an incomplete type with size 0, then we can't put it
4587 in data because it might be too big when completed. */
4588 if (!size || size > ix86_section_threshold)
4589 return true;
4590 }
4591
4592 return false;
4593 }
4594
4595 /* Switch to the appropriate section for output of DECL.
4596 DECL is either a `VAR_DECL' node or a constant of some sort.
4597 RELOC indicates whether forming the initial value of DECL requires
4598 link-time relocations. */
4599
4600 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4601 ATTRIBUTE_UNUSED;
4602
4603 static section *
4604 x86_64_elf_select_section (tree decl, int reloc,
4605 unsigned HOST_WIDE_INT align)
4606 {
4607 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4608 && ix86_in_large_data_p (decl))
4609 {
4610 const char *sname = NULL;
4611 unsigned int flags = SECTION_WRITE;
4612 switch (categorize_decl_for_section (decl, reloc))
4613 {
4614 case SECCAT_DATA:
4615 sname = ".ldata";
4616 break;
4617 case SECCAT_DATA_REL:
4618 sname = ".ldata.rel";
4619 break;
4620 case SECCAT_DATA_REL_LOCAL:
4621 sname = ".ldata.rel.local";
4622 break;
4623 case SECCAT_DATA_REL_RO:
4624 sname = ".ldata.rel.ro";
4625 break;
4626 case SECCAT_DATA_REL_RO_LOCAL:
4627 sname = ".ldata.rel.ro.local";
4628 break;
4629 case SECCAT_BSS:
4630 sname = ".lbss";
4631 flags |= SECTION_BSS;
4632 break;
4633 case SECCAT_RODATA:
4634 case SECCAT_RODATA_MERGE_STR:
4635 case SECCAT_RODATA_MERGE_STR_INIT:
4636 case SECCAT_RODATA_MERGE_CONST:
4637 sname = ".lrodata";
4638 flags = 0;
4639 break;
4640 case SECCAT_SRODATA:
4641 case SECCAT_SDATA:
4642 case SECCAT_SBSS:
4643 gcc_unreachable ();
4644 case SECCAT_TEXT:
4645 case SECCAT_TDATA:
4646 case SECCAT_TBSS:
4647 /* We don't split these for medium model. Place them into
4648 default sections and hope for best. */
4649 break;
4650 }
4651 if (sname)
4652 {
4653 /* We might get called with string constants, but get_named_section
4654 doesn't like them as they are not DECLs. Also, we need to set
4655 flags in that case. */
4656 if (!DECL_P (decl))
4657 return get_section (sname, flags, NULL);
4658 return get_named_section (decl, sname, reloc);
4659 }
4660 }
4661 return default_elf_select_section (decl, reloc, align);
4662 }
4663
4664 /* Build up a unique section name, expressed as a
4665 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4666 RELOC indicates whether the initial value of EXP requires
4667 link-time relocations. */
4668
4669 static void ATTRIBUTE_UNUSED
4670 x86_64_elf_unique_section (tree decl, int reloc)
4671 {
4672 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4673 && ix86_in_large_data_p (decl))
4674 {
4675 const char *prefix = NULL;
4676 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4677 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678
4679 switch (categorize_decl_for_section (decl, reloc))
4680 {
4681 case SECCAT_DATA:
4682 case SECCAT_DATA_REL:
4683 case SECCAT_DATA_REL_LOCAL:
4684 case SECCAT_DATA_REL_RO:
4685 case SECCAT_DATA_REL_RO_LOCAL:
4686 prefix = one_only ? ".ld" : ".ldata";
4687 break;
4688 case SECCAT_BSS:
4689 prefix = one_only ? ".lb" : ".lbss";
4690 break;
4691 case SECCAT_RODATA:
4692 case SECCAT_RODATA_MERGE_STR:
4693 case SECCAT_RODATA_MERGE_STR_INIT:
4694 case SECCAT_RODATA_MERGE_CONST:
4695 prefix = one_only ? ".lr" : ".lrodata";
4696 break;
4697 case SECCAT_SRODATA:
4698 case SECCAT_SDATA:
4699 case SECCAT_SBSS:
4700 gcc_unreachable ();
4701 case SECCAT_TEXT:
4702 case SECCAT_TDATA:
4703 case SECCAT_TBSS:
4704 /* We don't split these for medium model. Place them into
4705 default sections and hope for best. */
4706 break;
4707 }
4708 if (prefix)
4709 {
4710 const char *name, *linkonce;
4711 char *string;
4712
4713 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4714 name = targetm.strip_name_encoding (name);
4715
4716 /* If we're using one_only, then there needs to be a .gnu.linkonce
4717 prefix to the section name. */
4718 linkonce = one_only ? ".gnu.linkonce" : "";
4719
4720 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721
4722 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4723 return;
4724 }
4725 }
4726 default_unique_section (decl, reloc);
4727 }
4728
4729 #ifdef COMMON_ASM_OP
4730 /* This says how to output assembler code to declare an
4731 uninitialized external linkage data object.
4732
4733 For medium model x86-64 we need to use .largecomm opcode for
4734 large objects. */
4735 void
4736 x86_elf_aligned_common (FILE *file,
4737 const char *name, unsigned HOST_WIDE_INT size,
4738 int align)
4739 {
4740 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4741 && size > (unsigned int)ix86_section_threshold)
4742 fputs (".largecomm\t", file);
4743 else
4744 fputs (COMMON_ASM_OP, file);
4745 assemble_name (file, name);
4746 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4747 size, align / BITS_PER_UNIT);
4748 }
4749 #endif
4750
4751 /* Utility function for targets to use in implementing
4752 ASM_OUTPUT_ALIGNED_BSS. */
4753
4754 void
4755 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4756 const char *name, unsigned HOST_WIDE_INT size,
4757 int align)
4758 {
4759 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4760 && size > (unsigned int)ix86_section_threshold)
4761 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 else
4763 switch_to_section (bss_section);
4764 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4765 #ifdef ASM_DECLARE_OBJECT_NAME
4766 last_assemble_variable_decl = decl;
4767 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 #else
4769 /* Standard thing is just output label for the object. */
4770 ASM_OUTPUT_LABEL (file, name);
4771 #endif /* ASM_DECLARE_OBJECT_NAME */
4772 ASM_OUTPUT_SKIP (file, size ? size : 1);
4773 }
4774 \f
4775 /* Decide whether we must probe the stack before any space allocation
4776 on this target. It's essentially TARGET_STACK_PROBE except when
4777 -fstack-check causes the stack to be already probed differently. */
4778
4779 bool
4780 ix86_target_stack_probe (void)
4781 {
4782 /* Do not probe the stack twice if static stack checking is enabled. */
4783 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4784 return false;
4785
4786 return TARGET_STACK_PROBE;
4787 }
4788 \f
4789 /* Decide whether we can make a sibling call to a function. DECL is the
4790 declaration of the function being targeted by the call and EXP is the
4791 CALL_EXPR representing the call. */
4792
4793 static bool
4794 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 {
4796 tree type, decl_or_type;
4797 rtx a, b;
4798
4799 /* If we are generating position-independent code, we cannot sibcall
4800 optimize any indirect call, or a direct call to a global function,
4801 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4802 if (!TARGET_MACHO
4803 && !TARGET_64BIT
4804 && flag_pic
4805 && (!decl || !targetm.binds_local_p (decl)))
4806 return false;
4807
4808 /* If we need to align the outgoing stack, then sibcalling would
4809 unalign the stack, which may break the called function. */
4810 if (ix86_minimum_incoming_stack_boundary (true)
4811 < PREFERRED_STACK_BOUNDARY)
4812 return false;
4813
4814 if (decl)
4815 {
4816 decl_or_type = decl;
4817 type = TREE_TYPE (decl);
4818 }
4819 else
4820 {
4821 /* We're looking at the CALL_EXPR, we need the type of the function. */
4822 type = CALL_EXPR_FN (exp); /* pointer expression */
4823 type = TREE_TYPE (type); /* pointer type */
4824 type = TREE_TYPE (type); /* function type */
4825 decl_or_type = type;
4826 }
4827
4828 /* Check that the return value locations are the same. Like
4829 if we are returning floats on the 80387 register stack, we cannot
4830 make a sibcall from a function that doesn't return a float to a
4831 function that does or, conversely, from a function that does return
4832 a float to a function that doesn't; the necessary stack adjustment
4833 would not be executed. This is also the place we notice
4834 differences in the return value ABI. Note that it is ok for one
4835 of the functions to have void return type as long as the return
4836 value of the other is passed in a register. */
4837 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4838 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 cfun->decl, false);
4840 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 {
4842 if (!rtx_equal_p (a, b))
4843 return false;
4844 }
4845 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 {
4847 /* Disable sibcall if we need to generate vzeroupper after
4848 callee returns. */
4849 if (TARGET_VZEROUPPER
4850 && cfun->machine->callee_return_avx256_p
4851 && !cfun->machine->caller_return_avx256_p)
4852 return false;
4853 }
4854 else if (!rtx_equal_p (a, b))
4855 return false;
4856
4857 if (TARGET_64BIT)
4858 {
4859 /* The SYSV ABI has more call-clobbered registers;
4860 disallow sibcalls from MS to SYSV. */
4861 if (cfun->machine->call_abi == MS_ABI
4862 && ix86_function_type_abi (type) == SYSV_ABI)
4863 return false;
4864 }
4865 else
4866 {
4867 /* If this call is indirect, we'll need to be able to use a
4868 call-clobbered register for the address of the target function.
4869 Make sure that all such registers are not used for passing
4870 parameters. Note that DLLIMPORT functions are indirect. */
4871 if (!decl
4872 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 {
4874 if (ix86_function_regparm (type, NULL) >= 3)
4875 {
4876 /* ??? Need to count the actual number of registers to be used,
4877 not the possible number of registers. Fix later. */
4878 return false;
4879 }
4880 }
4881 }
4882
4883 /* Otherwise okay. That also includes certain types of indirect calls. */
4884 return true;
4885 }
4886
4887 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4888 and "sseregparm" calling convention attributes;
4889 arguments as in struct attribute_spec.handler. */
4890
4891 static tree
4892 ix86_handle_cconv_attribute (tree *node, tree name,
4893 tree args,
4894 int flags ATTRIBUTE_UNUSED,
4895 bool *no_add_attrs)
4896 {
4897 if (TREE_CODE (*node) != FUNCTION_TYPE
4898 && TREE_CODE (*node) != METHOD_TYPE
4899 && TREE_CODE (*node) != FIELD_DECL
4900 && TREE_CODE (*node) != TYPE_DECL)
4901 {
4902 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 name);
4904 *no_add_attrs = true;
4905 return NULL_TREE;
4906 }
4907
4908 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4909 if (is_attribute_p ("regparm", name))
4910 {
4911 tree cst;
4912
4913 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 {
4915 error ("fastcall and regparm attributes are not compatible");
4916 }
4917
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 {
4920 error ("regparam and thiscall attributes are not compatible");
4921 }
4922
4923 cst = TREE_VALUE (args);
4924 if (TREE_CODE (cst) != INTEGER_CST)
4925 {
4926 warning (OPT_Wattributes,
4927 "%qE attribute requires an integer constant argument",
4928 name);
4929 *no_add_attrs = true;
4930 }
4931 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 {
4933 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 name, REGPARM_MAX);
4935 *no_add_attrs = true;
4936 }
4937
4938 return NULL_TREE;
4939 }
4940
4941 if (TARGET_64BIT)
4942 {
4943 /* Do not warn when emulating the MS ABI. */
4944 if ((TREE_CODE (*node) != FUNCTION_TYPE
4945 && TREE_CODE (*node) != METHOD_TYPE)
4946 || ix86_function_type_abi (*node) != MS_ABI)
4947 warning (OPT_Wattributes, "%qE attribute ignored",
4948 name);
4949 *no_add_attrs = true;
4950 return NULL_TREE;
4951 }
4952
4953 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4954 if (is_attribute_p ("fastcall", name))
4955 {
4956 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 {
4958 error ("fastcall and cdecl attributes are not compatible");
4959 }
4960 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 {
4962 error ("fastcall and stdcall attributes are not compatible");
4963 }
4964 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 {
4966 error ("fastcall and regparm attributes are not compatible");
4967 }
4968 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 {
4970 error ("fastcall and thiscall attributes are not compatible");
4971 }
4972 }
4973
4974 /* Can combine stdcall with fastcall (redundant), regparm and
4975 sseregparm. */
4976 else if (is_attribute_p ("stdcall", name))
4977 {
4978 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 {
4980 error ("stdcall and cdecl attributes are not compatible");
4981 }
4982 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 {
4984 error ("stdcall and fastcall attributes are not compatible");
4985 }
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 {
4988 error ("stdcall and thiscall attributes are not compatible");
4989 }
4990 }
4991
4992 /* Can combine cdecl with regparm and sseregparm. */
4993 else if (is_attribute_p ("cdecl", name))
4994 {
4995 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 {
4997 error ("stdcall and cdecl attributes are not compatible");
4998 }
4999 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 {
5001 error ("fastcall and cdecl attributes are not compatible");
5002 }
5003 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 {
5005 error ("cdecl and thiscall attributes are not compatible");
5006 }
5007 }
5008 else if (is_attribute_p ("thiscall", name))
5009 {
5010 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5011 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 name);
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 {
5015 error ("stdcall and thiscall attributes are not compatible");
5016 }
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 {
5019 error ("fastcall and thiscall attributes are not compatible");
5020 }
5021 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 {
5023 error ("cdecl and thiscall attributes are not compatible");
5024 }
5025 }
5026
5027 /* Can combine sseregparm with all attributes. */
5028
5029 return NULL_TREE;
5030 }
5031
5032 /* This function determines from TYPE the calling-convention. */
5033
5034 unsigned int
5035 ix86_get_callcvt (const_tree type)
5036 {
5037 unsigned int ret = 0;
5038 bool is_stdarg;
5039 tree attrs;
5040
5041 if (TARGET_64BIT)
5042 return IX86_CALLCVT_CDECL;
5043
5044 attrs = TYPE_ATTRIBUTES (type);
5045 if (attrs != NULL_TREE)
5046 {
5047 if (lookup_attribute ("cdecl", attrs))
5048 ret |= IX86_CALLCVT_CDECL;
5049 else if (lookup_attribute ("stdcall", attrs))
5050 ret |= IX86_CALLCVT_STDCALL;
5051 else if (lookup_attribute ("fastcall", attrs))
5052 ret |= IX86_CALLCVT_FASTCALL;
5053 else if (lookup_attribute ("thiscall", attrs))
5054 ret |= IX86_CALLCVT_THISCALL;
5055
5056 /* Regparam isn't allowed for thiscall and fastcall. */
5057 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5058 {
5059 if (lookup_attribute ("regparm", attrs))
5060 ret |= IX86_CALLCVT_REGPARM;
5061 if (lookup_attribute ("sseregparm", attrs))
5062 ret |= IX86_CALLCVT_SSEREGPARM;
5063 }
5064
5065 if (IX86_BASE_CALLCVT(ret) != 0)
5066 return ret;
5067 }
5068
5069 is_stdarg = stdarg_p (type);
5070 if (TARGET_RTD && !is_stdarg)
5071 return IX86_CALLCVT_STDCALL | ret;
5072
5073 if (ret != 0
5074 || is_stdarg
5075 || TREE_CODE (type) != METHOD_TYPE
5076 || ix86_function_type_abi (type) != MS_ABI)
5077 return IX86_CALLCVT_CDECL | ret;
5078
5079 return IX86_CALLCVT_THISCALL;
5080 }
5081
5082 /* Return 0 if the attributes for two types are incompatible, 1 if they
5083 are compatible, and 2 if they are nearly compatible (which causes a
5084 warning to be generated). */
5085
5086 static int
5087 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5088 {
5089 unsigned int ccvt1, ccvt2;
5090
5091 if (TREE_CODE (type1) != FUNCTION_TYPE
5092 && TREE_CODE (type1) != METHOD_TYPE)
5093 return 1;
5094
5095 ccvt1 = ix86_get_callcvt (type1);
5096 ccvt2 = ix86_get_callcvt (type2);
5097 if (ccvt1 != ccvt2)
5098 return 0;
5099 if (ix86_function_regparm (type1, NULL)
5100 != ix86_function_regparm (type2, NULL))
5101 return 0;
5102
5103 return 1;
5104 }
5105 \f
5106 /* Return the regparm value for a function with the indicated TYPE and DECL.
5107 DECL may be NULL when calling function indirectly
5108 or considering a libcall. */
5109
5110 static int
5111 ix86_function_regparm (const_tree type, const_tree decl)
5112 {
5113 tree attr;
5114 int regparm;
5115 unsigned int ccvt;
5116
5117 if (TARGET_64BIT)
5118 return (ix86_function_type_abi (type) == SYSV_ABI
5119 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5120 ccvt = ix86_get_callcvt (type);
5121 regparm = ix86_regparm;
5122
5123 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5124 {
5125 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5126 if (attr)
5127 {
5128 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5129 return regparm;
5130 }
5131 }
5132 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5133 return 2;
5134 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5135 return 1;
5136
5137 /* Use register calling convention for local functions when possible. */
5138 if (decl
5139 && TREE_CODE (decl) == FUNCTION_DECL
5140 && optimize
5141 && !(profile_flag && !flag_fentry))
5142 {
5143 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5144 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5145 if (i && i->local && i->can_change_signature)
5146 {
5147 int local_regparm, globals = 0, regno;
5148
5149 /* Make sure no regparm register is taken by a
5150 fixed register variable. */
5151 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5152 if (fixed_regs[local_regparm])
5153 break;
5154
5155 /* We don't want to use regparm(3) for nested functions as
5156 these use a static chain pointer in the third argument. */
5157 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5158 local_regparm = 2;
5159
5160 /* In 32-bit mode save a register for the split stack. */
5161 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5162 local_regparm = 2;
5163
5164 /* Each fixed register usage increases register pressure,
5165 so less registers should be used for argument passing.
5166 This functionality can be overriden by an explicit
5167 regparm value. */
5168 for (regno = 0; regno <= DI_REG; regno++)
5169 if (fixed_regs[regno])
5170 globals++;
5171
5172 local_regparm
5173 = globals < local_regparm ? local_regparm - globals : 0;
5174
5175 if (local_regparm > regparm)
5176 regparm = local_regparm;
5177 }
5178 }
5179
5180 return regparm;
5181 }
5182
5183 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5184 DFmode (2) arguments in SSE registers for a function with the
5185 indicated TYPE and DECL. DECL may be NULL when calling function
5186 indirectly or considering a libcall. Otherwise return 0. */
5187
5188 static int
5189 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5190 {
5191 gcc_assert (!TARGET_64BIT);
5192
5193 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5194 by the sseregparm attribute. */
5195 if (TARGET_SSEREGPARM
5196 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5197 {
5198 if (!TARGET_SSE)
5199 {
5200 if (warn)
5201 {
5202 if (decl)
5203 error ("calling %qD with attribute sseregparm without "
5204 "SSE/SSE2 enabled", decl);
5205 else
5206 error ("calling %qT with attribute sseregparm without "
5207 "SSE/SSE2 enabled", type);
5208 }
5209 return 0;
5210 }
5211
5212 return 2;
5213 }
5214
5215 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5216 (and DFmode for SSE2) arguments in SSE registers. */
5217 if (decl && TARGET_SSE_MATH && optimize
5218 && !(profile_flag && !flag_fentry))
5219 {
5220 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5221 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5222 if (i && i->local && i->can_change_signature)
5223 return TARGET_SSE2 ? 2 : 1;
5224 }
5225
5226 return 0;
5227 }
5228
5229 /* Return true if EAX is live at the start of the function. Used by
5230 ix86_expand_prologue to determine if we need special help before
5231 calling allocate_stack_worker. */
5232
5233 static bool
5234 ix86_eax_live_at_start_p (void)
5235 {
5236 /* Cheat. Don't bother working forward from ix86_function_regparm
5237 to the function type to whether an actual argument is located in
5238 eax. Instead just look at cfg info, which is still close enough
5239 to correct at this point. This gives false positives for broken
5240 functions that might use uninitialized data that happens to be
5241 allocated in eax, but who cares? */
5242 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5243 }
5244
5245 static bool
5246 ix86_keep_aggregate_return_pointer (tree fntype)
5247 {
5248 tree attr;
5249
5250 if (!TARGET_64BIT)
5251 {
5252 attr = lookup_attribute ("callee_pop_aggregate_return",
5253 TYPE_ATTRIBUTES (fntype));
5254 if (attr)
5255 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5256
5257 /* For 32-bit MS-ABI the default is to keep aggregate
5258 return pointer. */
5259 if (ix86_function_type_abi (fntype) == MS_ABI)
5260 return true;
5261 }
5262 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5263 }
5264
5265 /* Value is the number of bytes of arguments automatically
5266 popped when returning from a subroutine call.
5267 FUNDECL is the declaration node of the function (as a tree),
5268 FUNTYPE is the data type of the function (as a tree),
5269 or for a library call it is an identifier node for the subroutine name.
5270 SIZE is the number of bytes of arguments passed on the stack.
5271
5272 On the 80386, the RTD insn may be used to pop them if the number
5273 of args is fixed, but if the number is variable then the caller
5274 must pop them all. RTD can't be used for library calls now
5275 because the library is compiled with the Unix compiler.
5276 Use of RTD is a selectable option, since it is incompatible with
5277 standard Unix calling sequences. If the option is not selected,
5278 the caller must always pop the args.
5279
5280 The attribute stdcall is equivalent to RTD on a per module basis. */
5281
5282 static int
5283 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5284 {
5285 unsigned int ccvt;
5286
5287 /* None of the 64-bit ABIs pop arguments. */
5288 if (TARGET_64BIT)
5289 return 0;
5290
5291 ccvt = ix86_get_callcvt (funtype);
5292
5293 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5294 | IX86_CALLCVT_THISCALL)) != 0
5295 && ! stdarg_p (funtype))
5296 return size;
5297
5298 /* Lose any fake structure return argument if it is passed on the stack. */
5299 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5300 && !ix86_keep_aggregate_return_pointer (funtype))
5301 {
5302 int nregs = ix86_function_regparm (funtype, fundecl);
5303 if (nregs == 0)
5304 return GET_MODE_SIZE (Pmode);
5305 }
5306
5307 return 0;
5308 }
5309 \f
5310 /* Argument support functions. */
5311
5312 /* Return true when register may be used to pass function parameters. */
5313 bool
5314 ix86_function_arg_regno_p (int regno)
5315 {
5316 int i;
5317 const int *parm_regs;
5318
5319 if (!TARGET_64BIT)
5320 {
5321 if (TARGET_MACHO)
5322 return (regno < REGPARM_MAX
5323 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5324 else
5325 return (regno < REGPARM_MAX
5326 || (TARGET_MMX && MMX_REGNO_P (regno)
5327 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5328 || (TARGET_SSE && SSE_REGNO_P (regno)
5329 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5330 }
5331
5332 if (TARGET_MACHO)
5333 {
5334 if (SSE_REGNO_P (regno) && TARGET_SSE)
5335 return true;
5336 }
5337 else
5338 {
5339 if (TARGET_SSE && SSE_REGNO_P (regno)
5340 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5341 return true;
5342 }
5343
5344 /* TODO: The function should depend on current function ABI but
5345 builtins.c would need updating then. Therefore we use the
5346 default ABI. */
5347
5348 /* RAX is used as hidden argument to va_arg functions. */
5349 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5350 return true;
5351
5352 if (ix86_abi == MS_ABI)
5353 parm_regs = x86_64_ms_abi_int_parameter_registers;
5354 else
5355 parm_regs = x86_64_int_parameter_registers;
5356 for (i = 0; i < (ix86_abi == MS_ABI
5357 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5358 if (regno == parm_regs[i])
5359 return true;
5360 return false;
5361 }
5362
5363 /* Return if we do not know how to pass TYPE solely in registers. */
5364
5365 static bool
5366 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5367 {
5368 if (must_pass_in_stack_var_size_or_pad (mode, type))
5369 return true;
5370
5371 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5372 The layout_type routine is crafty and tries to trick us into passing
5373 currently unsupported vector types on the stack by using TImode. */
5374 return (!TARGET_64BIT && mode == TImode
5375 && type && TREE_CODE (type) != VECTOR_TYPE);
5376 }
5377
5378 /* It returns the size, in bytes, of the area reserved for arguments passed
5379 in registers for the function represented by fndecl dependent to the used
5380 abi format. */
5381 int
5382 ix86_reg_parm_stack_space (const_tree fndecl)
5383 {
5384 enum calling_abi call_abi = SYSV_ABI;
5385 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5386 call_abi = ix86_function_abi (fndecl);
5387 else
5388 call_abi = ix86_function_type_abi (fndecl);
5389 if (TARGET_64BIT && call_abi == MS_ABI)
5390 return 32;
5391 return 0;
5392 }
5393
5394 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5395 call abi used. */
5396 enum calling_abi
5397 ix86_function_type_abi (const_tree fntype)
5398 {
5399 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5400 {
5401 enum calling_abi abi = ix86_abi;
5402 if (abi == SYSV_ABI)
5403 {
5404 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5405 abi = MS_ABI;
5406 }
5407 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5408 abi = SYSV_ABI;
5409 return abi;
5410 }
5411 return ix86_abi;
5412 }
5413
5414 static bool
5415 ix86_function_ms_hook_prologue (const_tree fn)
5416 {
5417 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5418 {
5419 if (decl_function_context (fn) != NULL_TREE)
5420 error_at (DECL_SOURCE_LOCATION (fn),
5421 "ms_hook_prologue is not compatible with nested function");
5422 else
5423 return true;
5424 }
5425 return false;
5426 }
5427
5428 static enum calling_abi
5429 ix86_function_abi (const_tree fndecl)
5430 {
5431 if (! fndecl)
5432 return ix86_abi;
5433 return ix86_function_type_abi (TREE_TYPE (fndecl));
5434 }
5435
5436 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5437 call abi used. */
5438 enum calling_abi
5439 ix86_cfun_abi (void)
5440 {
5441 if (! cfun)
5442 return ix86_abi;
5443 return cfun->machine->call_abi;
5444 }
5445
5446 /* Write the extra assembler code needed to declare a function properly. */
5447
5448 void
5449 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5450 tree decl)
5451 {
5452 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5453
5454 if (is_ms_hook)
5455 {
5456 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5457 unsigned int filler_cc = 0xcccccccc;
5458
5459 for (i = 0; i < filler_count; i += 4)
5460 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5461 }
5462
5463 #ifdef SUBTARGET_ASM_UNWIND_INIT
5464 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5465 #endif
5466
5467 ASM_OUTPUT_LABEL (asm_out_file, fname);
5468
5469 /* Output magic byte marker, if hot-patch attribute is set. */
5470 if (is_ms_hook)
5471 {
5472 if (TARGET_64BIT)
5473 {
5474 /* leaq [%rsp + 0], %rsp */
5475 asm_fprintf (asm_out_file, ASM_BYTE
5476 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5477 }
5478 else
5479 {
5480 /* movl.s %edi, %edi
5481 push %ebp
5482 movl.s %esp, %ebp */
5483 asm_fprintf (asm_out_file, ASM_BYTE
5484 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5485 }
5486 }
5487 }
5488
5489 /* regclass.c */
5490 extern void init_regs (void);
5491
5492 /* Implementation of call abi switching target hook. Specific to FNDECL
5493 the specific call register sets are set. See also
5494 ix86_conditional_register_usage for more details. */
5495 void
5496 ix86_call_abi_override (const_tree fndecl)
5497 {
5498 if (fndecl == NULL_TREE)
5499 cfun->machine->call_abi = ix86_abi;
5500 else
5501 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5502 }
5503
5504 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5505 expensive re-initialization of init_regs each time we switch function context
5506 since this is needed only during RTL expansion. */
5507 static void
5508 ix86_maybe_switch_abi (void)
5509 {
5510 if (TARGET_64BIT &&
5511 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5512 reinit_regs ();
5513 }
5514
5515 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5516 for a call to a function whose data type is FNTYPE.
5517 For a library call, FNTYPE is 0. */
5518
5519 void
5520 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5521 tree fntype, /* tree ptr for function decl */
5522 rtx libname, /* SYMBOL_REF of library name or 0 */
5523 tree fndecl,
5524 int caller)
5525 {
5526 struct cgraph_local_info *i;
5527 tree fnret_type;
5528
5529 memset (cum, 0, sizeof (*cum));
5530
5531 /* Initialize for the current callee. */
5532 if (caller)
5533 {
5534 cfun->machine->callee_pass_avx256_p = false;
5535 cfun->machine->callee_return_avx256_p = false;
5536 }
5537
5538 if (fndecl)
5539 {
5540 i = cgraph_local_info (fndecl);
5541 cum->call_abi = ix86_function_abi (fndecl);
5542 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5543 }
5544 else
5545 {
5546 i = NULL;
5547 cum->call_abi = ix86_function_type_abi (fntype);
5548 if (fntype)
5549 fnret_type = TREE_TYPE (fntype);
5550 else
5551 fnret_type = NULL;
5552 }
5553
5554 if (TARGET_VZEROUPPER && fnret_type)
5555 {
5556 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5557 false);
5558 if (function_pass_avx256_p (fnret_value))
5559 {
5560 /* The return value of this function uses 256bit AVX modes. */
5561 if (caller)
5562 cfun->machine->callee_return_avx256_p = true;
5563 else
5564 cfun->machine->caller_return_avx256_p = true;
5565 }
5566 }
5567
5568 cum->caller = caller;
5569
5570 /* Set up the number of registers to use for passing arguments. */
5571
5572 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5573 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5574 "or subtarget optimization implying it");
5575 cum->nregs = ix86_regparm;
5576 if (TARGET_64BIT)
5577 {
5578 cum->nregs = (cum->call_abi == SYSV_ABI
5579 ? X86_64_REGPARM_MAX
5580 : X86_64_MS_REGPARM_MAX);
5581 }
5582 if (TARGET_SSE)
5583 {
5584 cum->sse_nregs = SSE_REGPARM_MAX;
5585 if (TARGET_64BIT)
5586 {
5587 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5588 ? X86_64_SSE_REGPARM_MAX
5589 : X86_64_MS_SSE_REGPARM_MAX);
5590 }
5591 }
5592 if (TARGET_MMX)
5593 cum->mmx_nregs = MMX_REGPARM_MAX;
5594 cum->warn_avx = true;
5595 cum->warn_sse = true;
5596 cum->warn_mmx = true;
5597
5598 /* Because type might mismatch in between caller and callee, we need to
5599 use actual type of function for local calls.
5600 FIXME: cgraph_analyze can be told to actually record if function uses
5601 va_start so for local functions maybe_vaarg can be made aggressive
5602 helping K&R code.
5603 FIXME: once typesytem is fixed, we won't need this code anymore. */
5604 if (i && i->local && i->can_change_signature)
5605 fntype = TREE_TYPE (fndecl);
5606 cum->maybe_vaarg = (fntype
5607 ? (!prototype_p (fntype) || stdarg_p (fntype))
5608 : !libname);
5609
5610 if (!TARGET_64BIT)
5611 {
5612 /* If there are variable arguments, then we won't pass anything
5613 in registers in 32-bit mode. */
5614 if (stdarg_p (fntype))
5615 {
5616 cum->nregs = 0;
5617 cum->sse_nregs = 0;
5618 cum->mmx_nregs = 0;
5619 cum->warn_avx = 0;
5620 cum->warn_sse = 0;
5621 cum->warn_mmx = 0;
5622 return;
5623 }
5624
5625 /* Use ecx and edx registers if function has fastcall attribute,
5626 else look for regparm information. */
5627 if (fntype)
5628 {
5629 unsigned int ccvt = ix86_get_callcvt (fntype);
5630 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5631 {
5632 cum->nregs = 1;
5633 cum->fastcall = 1; /* Same first register as in fastcall. */
5634 }
5635 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5636 {
5637 cum->nregs = 2;
5638 cum->fastcall = 1;
5639 }
5640 else
5641 cum->nregs = ix86_function_regparm (fntype, fndecl);
5642 }
5643
5644 /* Set up the number of SSE registers used for passing SFmode
5645 and DFmode arguments. Warn for mismatching ABI. */
5646 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5647 }
5648 }
5649
5650 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5651 But in the case of vector types, it is some vector mode.
5652
5653 When we have only some of our vector isa extensions enabled, then there
5654 are some modes for which vector_mode_supported_p is false. For these
5655 modes, the generic vector support in gcc will choose some non-vector mode
5656 in order to implement the type. By computing the natural mode, we'll
5657 select the proper ABI location for the operand and not depend on whatever
5658 the middle-end decides to do with these vector types.
5659
5660 The midde-end can't deal with the vector types > 16 bytes. In this
5661 case, we return the original mode and warn ABI change if CUM isn't
5662 NULL. */
5663
5664 static enum machine_mode
5665 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5666 {
5667 enum machine_mode mode = TYPE_MODE (type);
5668
5669 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5670 {
5671 HOST_WIDE_INT size = int_size_in_bytes (type);
5672 if ((size == 8 || size == 16 || size == 32)
5673 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5674 && TYPE_VECTOR_SUBPARTS (type) > 1)
5675 {
5676 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5677
5678 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5679 mode = MIN_MODE_VECTOR_FLOAT;
5680 else
5681 mode = MIN_MODE_VECTOR_INT;
5682
5683 /* Get the mode which has this inner mode and number of units. */
5684 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5685 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5686 && GET_MODE_INNER (mode) == innermode)
5687 {
5688 if (size == 32 && !TARGET_AVX)
5689 {
5690 static bool warnedavx;
5691
5692 if (cum
5693 && !warnedavx
5694 && cum->warn_avx)
5695 {
5696 warnedavx = true;
5697 warning (0, "AVX vector argument without AVX "
5698 "enabled changes the ABI");
5699 }
5700 return TYPE_MODE (type);
5701 }
5702 else
5703 return mode;
5704 }
5705
5706 gcc_unreachable ();
5707 }
5708 }
5709
5710 return mode;
5711 }
5712
5713 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5714 this may not agree with the mode that the type system has chosen for the
5715 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5716 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5717
5718 static rtx
5719 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5720 unsigned int regno)
5721 {
5722 rtx tmp;
5723
5724 if (orig_mode != BLKmode)
5725 tmp = gen_rtx_REG (orig_mode, regno);
5726 else
5727 {
5728 tmp = gen_rtx_REG (mode, regno);
5729 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5730 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5731 }
5732
5733 return tmp;
5734 }
5735
5736 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5737 of this code is to classify each 8bytes of incoming argument by the register
5738 class and assign registers accordingly. */
5739
5740 /* Return the union class of CLASS1 and CLASS2.
5741 See the x86-64 PS ABI for details. */
5742
5743 static enum x86_64_reg_class
5744 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5745 {
5746 /* Rule #1: If both classes are equal, this is the resulting class. */
5747 if (class1 == class2)
5748 return class1;
5749
5750 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5751 the other class. */
5752 if (class1 == X86_64_NO_CLASS)
5753 return class2;
5754 if (class2 == X86_64_NO_CLASS)
5755 return class1;
5756
5757 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5758 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5759 return X86_64_MEMORY_CLASS;
5760
5761 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5762 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5763 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5764 return X86_64_INTEGERSI_CLASS;
5765 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5766 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5767 return X86_64_INTEGER_CLASS;
5768
5769 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5770 MEMORY is used. */
5771 if (class1 == X86_64_X87_CLASS
5772 || class1 == X86_64_X87UP_CLASS
5773 || class1 == X86_64_COMPLEX_X87_CLASS
5774 || class2 == X86_64_X87_CLASS
5775 || class2 == X86_64_X87UP_CLASS
5776 || class2 == X86_64_COMPLEX_X87_CLASS)
5777 return X86_64_MEMORY_CLASS;
5778
5779 /* Rule #6: Otherwise class SSE is used. */
5780 return X86_64_SSE_CLASS;
5781 }
5782
5783 /* Classify the argument of type TYPE and mode MODE.
5784 CLASSES will be filled by the register class used to pass each word
5785 of the operand. The number of words is returned. In case the parameter
5786 should be passed in memory, 0 is returned. As a special case for zero
5787 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5788
5789 BIT_OFFSET is used internally for handling records and specifies offset
5790 of the offset in bits modulo 256 to avoid overflow cases.
5791
5792 See the x86-64 PS ABI for details.
5793 */
5794
5795 static int
5796 classify_argument (enum machine_mode mode, const_tree type,
5797 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5798 {
5799 HOST_WIDE_INT bytes =
5800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5801 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5802
5803 /* Variable sized entities are always passed/returned in memory. */
5804 if (bytes < 0)
5805 return 0;
5806
5807 if (mode != VOIDmode
5808 && targetm.calls.must_pass_in_stack (mode, type))
5809 return 0;
5810
5811 if (type && AGGREGATE_TYPE_P (type))
5812 {
5813 int i;
5814 tree field;
5815 enum x86_64_reg_class subclasses[MAX_CLASSES];
5816
5817 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5818 if (bytes > 32)
5819 return 0;
5820
5821 for (i = 0; i < words; i++)
5822 classes[i] = X86_64_NO_CLASS;
5823
5824 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5825 signalize memory class, so handle it as special case. */
5826 if (!words)
5827 {
5828 classes[0] = X86_64_NO_CLASS;
5829 return 1;
5830 }
5831
5832 /* Classify each field of record and merge classes. */
5833 switch (TREE_CODE (type))
5834 {
5835 case RECORD_TYPE:
5836 /* And now merge the fields of structure. */
5837 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5838 {
5839 if (TREE_CODE (field) == FIELD_DECL)
5840 {
5841 int num;
5842
5843 if (TREE_TYPE (field) == error_mark_node)
5844 continue;
5845
5846 /* Bitfields are always classified as integer. Handle them
5847 early, since later code would consider them to be
5848 misaligned integers. */
5849 if (DECL_BIT_FIELD (field))
5850 {
5851 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5852 i < ((int_bit_position (field) + (bit_offset % 64))
5853 + tree_low_cst (DECL_SIZE (field), 0)
5854 + 63) / 8 / 8; i++)
5855 classes[i] =
5856 merge_classes (X86_64_INTEGER_CLASS,
5857 classes[i]);
5858 }
5859 else
5860 {
5861 int pos;
5862
5863 type = TREE_TYPE (field);
5864
5865 /* Flexible array member is ignored. */
5866 if (TYPE_MODE (type) == BLKmode
5867 && TREE_CODE (type) == ARRAY_TYPE
5868 && TYPE_SIZE (type) == NULL_TREE
5869 && TYPE_DOMAIN (type) != NULL_TREE
5870 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5871 == NULL_TREE))
5872 {
5873 static bool warned;
5874
5875 if (!warned && warn_psabi)
5876 {
5877 warned = true;
5878 inform (input_location,
5879 "the ABI of passing struct with"
5880 " a flexible array member has"
5881 " changed in GCC 4.4");
5882 }
5883 continue;
5884 }
5885 num = classify_argument (TYPE_MODE (type), type,
5886 subclasses,
5887 (int_bit_position (field)
5888 + bit_offset) % 256);
5889 if (!num)
5890 return 0;
5891 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5892 for (i = 0; i < num && (i + pos) < words; i++)
5893 classes[i + pos] =
5894 merge_classes (subclasses[i], classes[i + pos]);
5895 }
5896 }
5897 }
5898 break;
5899
5900 case ARRAY_TYPE:
5901 /* Arrays are handled as small records. */
5902 {
5903 int num;
5904 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5905 TREE_TYPE (type), subclasses, bit_offset);
5906 if (!num)
5907 return 0;
5908
5909 /* The partial classes are now full classes. */
5910 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5911 subclasses[0] = X86_64_SSE_CLASS;
5912 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5913 && !((bit_offset % 64) == 0 && bytes == 4))
5914 subclasses[0] = X86_64_INTEGER_CLASS;
5915
5916 for (i = 0; i < words; i++)
5917 classes[i] = subclasses[i % num];
5918
5919 break;
5920 }
5921 case UNION_TYPE:
5922 case QUAL_UNION_TYPE:
5923 /* Unions are similar to RECORD_TYPE but offset is always 0.
5924 */
5925 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5926 {
5927 if (TREE_CODE (field) == FIELD_DECL)
5928 {
5929 int num;
5930
5931 if (TREE_TYPE (field) == error_mark_node)
5932 continue;
5933
5934 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5935 TREE_TYPE (field), subclasses,
5936 bit_offset);
5937 if (!num)
5938 return 0;
5939 for (i = 0; i < num; i++)
5940 classes[i] = merge_classes (subclasses[i], classes[i]);
5941 }
5942 }
5943 break;
5944
5945 default:
5946 gcc_unreachable ();
5947 }
5948
5949 if (words > 2)
5950 {
5951 /* When size > 16 bytes, if the first one isn't
5952 X86_64_SSE_CLASS or any other ones aren't
5953 X86_64_SSEUP_CLASS, everything should be passed in
5954 memory. */
5955 if (classes[0] != X86_64_SSE_CLASS)
5956 return 0;
5957
5958 for (i = 1; i < words; i++)
5959 if (classes[i] != X86_64_SSEUP_CLASS)
5960 return 0;
5961 }
5962
5963 /* Final merger cleanup. */
5964 for (i = 0; i < words; i++)
5965 {
5966 /* If one class is MEMORY, everything should be passed in
5967 memory. */
5968 if (classes[i] == X86_64_MEMORY_CLASS)
5969 return 0;
5970
5971 /* The X86_64_SSEUP_CLASS should be always preceded by
5972 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5973 if (classes[i] == X86_64_SSEUP_CLASS
5974 && classes[i - 1] != X86_64_SSE_CLASS
5975 && classes[i - 1] != X86_64_SSEUP_CLASS)
5976 {
5977 /* The first one should never be X86_64_SSEUP_CLASS. */
5978 gcc_assert (i != 0);
5979 classes[i] = X86_64_SSE_CLASS;
5980 }
5981
5982 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5983 everything should be passed in memory. */
5984 if (classes[i] == X86_64_X87UP_CLASS
5985 && (classes[i - 1] != X86_64_X87_CLASS))
5986 {
5987 static bool warned;
5988
5989 /* The first one should never be X86_64_X87UP_CLASS. */
5990 gcc_assert (i != 0);
5991 if (!warned && warn_psabi)
5992 {
5993 warned = true;
5994 inform (input_location,
5995 "the ABI of passing union with long double"
5996 " has changed in GCC 4.4");
5997 }
5998 return 0;
5999 }
6000 }
6001 return words;
6002 }
6003
6004 /* Compute alignment needed. We align all types to natural boundaries with
6005 exception of XFmode that is aligned to 64bits. */
6006 if (mode != VOIDmode && mode != BLKmode)
6007 {
6008 int mode_alignment = GET_MODE_BITSIZE (mode);
6009
6010 if (mode == XFmode)
6011 mode_alignment = 128;
6012 else if (mode == XCmode)
6013 mode_alignment = 256;
6014 if (COMPLEX_MODE_P (mode))
6015 mode_alignment /= 2;
6016 /* Misaligned fields are always returned in memory. */
6017 if (bit_offset % mode_alignment)
6018 return 0;
6019 }
6020
6021 /* for V1xx modes, just use the base mode */
6022 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6023 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6024 mode = GET_MODE_INNER (mode);
6025
6026 /* Classification of atomic types. */
6027 switch (mode)
6028 {
6029 case SDmode:
6030 case DDmode:
6031 classes[0] = X86_64_SSE_CLASS;
6032 return 1;
6033 case TDmode:
6034 classes[0] = X86_64_SSE_CLASS;
6035 classes[1] = X86_64_SSEUP_CLASS;
6036 return 2;
6037 case DImode:
6038 case SImode:
6039 case HImode:
6040 case QImode:
6041 case CSImode:
6042 case CHImode:
6043 case CQImode:
6044 {
6045 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6046
6047 if (size <= 32)
6048 {
6049 classes[0] = X86_64_INTEGERSI_CLASS;
6050 return 1;
6051 }
6052 else if (size <= 64)
6053 {
6054 classes[0] = X86_64_INTEGER_CLASS;
6055 return 1;
6056 }
6057 else if (size <= 64+32)
6058 {
6059 classes[0] = X86_64_INTEGER_CLASS;
6060 classes[1] = X86_64_INTEGERSI_CLASS;
6061 return 2;
6062 }
6063 else if (size <= 64+64)
6064 {
6065 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6066 return 2;
6067 }
6068 else
6069 gcc_unreachable ();
6070 }
6071 case CDImode:
6072 case TImode:
6073 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6074 return 2;
6075 case COImode:
6076 case OImode:
6077 /* OImode shouldn't be used directly. */
6078 gcc_unreachable ();
6079 case CTImode:
6080 return 0;
6081 case SFmode:
6082 if (!(bit_offset % 64))
6083 classes[0] = X86_64_SSESF_CLASS;
6084 else
6085 classes[0] = X86_64_SSE_CLASS;
6086 return 1;
6087 case DFmode:
6088 classes[0] = X86_64_SSEDF_CLASS;
6089 return 1;
6090 case XFmode:
6091 classes[0] = X86_64_X87_CLASS;
6092 classes[1] = X86_64_X87UP_CLASS;
6093 return 2;
6094 case TFmode:
6095 classes[0] = X86_64_SSE_CLASS;
6096 classes[1] = X86_64_SSEUP_CLASS;
6097 return 2;
6098 case SCmode:
6099 classes[0] = X86_64_SSE_CLASS;
6100 if (!(bit_offset % 64))
6101 return 1;
6102 else
6103 {
6104 static bool warned;
6105
6106 if (!warned && warn_psabi)
6107 {
6108 warned = true;
6109 inform (input_location,
6110 "the ABI of passing structure with complex float"
6111 " member has changed in GCC 4.4");
6112 }
6113 classes[1] = X86_64_SSESF_CLASS;
6114 return 2;
6115 }
6116 case DCmode:
6117 classes[0] = X86_64_SSEDF_CLASS;
6118 classes[1] = X86_64_SSEDF_CLASS;
6119 return 2;
6120 case XCmode:
6121 classes[0] = X86_64_COMPLEX_X87_CLASS;
6122 return 1;
6123 case TCmode:
6124 /* This modes is larger than 16 bytes. */
6125 return 0;
6126 case V8SFmode:
6127 case V8SImode:
6128 case V32QImode:
6129 case V16HImode:
6130 case V4DFmode:
6131 case V4DImode:
6132 classes[0] = X86_64_SSE_CLASS;
6133 classes[1] = X86_64_SSEUP_CLASS;
6134 classes[2] = X86_64_SSEUP_CLASS;
6135 classes[3] = X86_64_SSEUP_CLASS;
6136 return 4;
6137 case V4SFmode:
6138 case V4SImode:
6139 case V16QImode:
6140 case V8HImode:
6141 case V2DFmode:
6142 case V2DImode:
6143 classes[0] = X86_64_SSE_CLASS;
6144 classes[1] = X86_64_SSEUP_CLASS;
6145 return 2;
6146 case V1TImode:
6147 case V1DImode:
6148 case V2SFmode:
6149 case V2SImode:
6150 case V4HImode:
6151 case V8QImode:
6152 classes[0] = X86_64_SSE_CLASS;
6153 return 1;
6154 case BLKmode:
6155 case VOIDmode:
6156 return 0;
6157 default:
6158 gcc_assert (VECTOR_MODE_P (mode));
6159
6160 if (bytes > 16)
6161 return 0;
6162
6163 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6164
6165 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6166 classes[0] = X86_64_INTEGERSI_CLASS;
6167 else
6168 classes[0] = X86_64_INTEGER_CLASS;
6169 classes[1] = X86_64_INTEGER_CLASS;
6170 return 1 + (bytes > 8);
6171 }
6172 }
6173
6174 /* Examine the argument and return set number of register required in each
6175 class. Return 0 iff parameter should be passed in memory. */
6176 static int
6177 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6178 int *int_nregs, int *sse_nregs)
6179 {
6180 enum x86_64_reg_class regclass[MAX_CLASSES];
6181 int n = classify_argument (mode, type, regclass, 0);
6182
6183 *int_nregs = 0;
6184 *sse_nregs = 0;
6185 if (!n)
6186 return 0;
6187 for (n--; n >= 0; n--)
6188 switch (regclass[n])
6189 {
6190 case X86_64_INTEGER_CLASS:
6191 case X86_64_INTEGERSI_CLASS:
6192 (*int_nregs)++;
6193 break;
6194 case X86_64_SSE_CLASS:
6195 case X86_64_SSESF_CLASS:
6196 case X86_64_SSEDF_CLASS:
6197 (*sse_nregs)++;
6198 break;
6199 case X86_64_NO_CLASS:
6200 case X86_64_SSEUP_CLASS:
6201 break;
6202 case X86_64_X87_CLASS:
6203 case X86_64_X87UP_CLASS:
6204 if (!in_return)
6205 return 0;
6206 break;
6207 case X86_64_COMPLEX_X87_CLASS:
6208 return in_return ? 2 : 0;
6209 case X86_64_MEMORY_CLASS:
6210 gcc_unreachable ();
6211 }
6212 return 1;
6213 }
6214
6215 /* Construct container for the argument used by GCC interface. See
6216 FUNCTION_ARG for the detailed description. */
6217
6218 static rtx
6219 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6220 const_tree type, int in_return, int nintregs, int nsseregs,
6221 const int *intreg, int sse_regno)
6222 {
6223 /* The following variables hold the static issued_error state. */
6224 static bool issued_sse_arg_error;
6225 static bool issued_sse_ret_error;
6226 static bool issued_x87_ret_error;
6227
6228 enum machine_mode tmpmode;
6229 int bytes =
6230 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6231 enum x86_64_reg_class regclass[MAX_CLASSES];
6232 int n;
6233 int i;
6234 int nexps = 0;
6235 int needed_sseregs, needed_intregs;
6236 rtx exp[MAX_CLASSES];
6237 rtx ret;
6238
6239 n = classify_argument (mode, type, regclass, 0);
6240 if (!n)
6241 return NULL;
6242 if (!examine_argument (mode, type, in_return, &needed_intregs,
6243 &needed_sseregs))
6244 return NULL;
6245 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6246 return NULL;
6247
6248 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6249 some less clueful developer tries to use floating-point anyway. */
6250 if (needed_sseregs && !TARGET_SSE)
6251 {
6252 if (in_return)
6253 {
6254 if (!issued_sse_ret_error)
6255 {
6256 error ("SSE register return with SSE disabled");
6257 issued_sse_ret_error = true;
6258 }
6259 }
6260 else if (!issued_sse_arg_error)
6261 {
6262 error ("SSE register argument with SSE disabled");
6263 issued_sse_arg_error = true;
6264 }
6265 return NULL;
6266 }
6267
6268 /* Likewise, error if the ABI requires us to return values in the
6269 x87 registers and the user specified -mno-80387. */
6270 if (!TARGET_80387 && in_return)
6271 for (i = 0; i < n; i++)
6272 if (regclass[i] == X86_64_X87_CLASS
6273 || regclass[i] == X86_64_X87UP_CLASS
6274 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6275 {
6276 if (!issued_x87_ret_error)
6277 {
6278 error ("x87 register return with x87 disabled");
6279 issued_x87_ret_error = true;
6280 }
6281 return NULL;
6282 }
6283
6284 /* First construct simple cases. Avoid SCmode, since we want to use
6285 single register to pass this type. */
6286 if (n == 1 && mode != SCmode)
6287 switch (regclass[0])
6288 {
6289 case X86_64_INTEGER_CLASS:
6290 case X86_64_INTEGERSI_CLASS:
6291 return gen_rtx_REG (mode, intreg[0]);
6292 case X86_64_SSE_CLASS:
6293 case X86_64_SSESF_CLASS:
6294 case X86_64_SSEDF_CLASS:
6295 if (mode != BLKmode)
6296 return gen_reg_or_parallel (mode, orig_mode,
6297 SSE_REGNO (sse_regno));
6298 break;
6299 case X86_64_X87_CLASS:
6300 case X86_64_COMPLEX_X87_CLASS:
6301 return gen_rtx_REG (mode, FIRST_STACK_REG);
6302 case X86_64_NO_CLASS:
6303 /* Zero sized array, struct or class. */
6304 return NULL;
6305 default:
6306 gcc_unreachable ();
6307 }
6308 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6309 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6310 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6311 if (n == 4
6312 && regclass[0] == X86_64_SSE_CLASS
6313 && regclass[1] == X86_64_SSEUP_CLASS
6314 && regclass[2] == X86_64_SSEUP_CLASS
6315 && regclass[3] == X86_64_SSEUP_CLASS
6316 && mode != BLKmode)
6317 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6318
6319 if (n == 2
6320 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6321 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6322 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6323 && regclass[1] == X86_64_INTEGER_CLASS
6324 && (mode == CDImode || mode == TImode || mode == TFmode)
6325 && intreg[0] + 1 == intreg[1])
6326 return gen_rtx_REG (mode, intreg[0]);
6327
6328 /* Otherwise figure out the entries of the PARALLEL. */
6329 for (i = 0; i < n; i++)
6330 {
6331 int pos;
6332
6333 switch (regclass[i])
6334 {
6335 case X86_64_NO_CLASS:
6336 break;
6337 case X86_64_INTEGER_CLASS:
6338 case X86_64_INTEGERSI_CLASS:
6339 /* Merge TImodes on aligned occasions here too. */
6340 if (i * 8 + 8 > bytes)
6341 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6342 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6343 tmpmode = SImode;
6344 else
6345 tmpmode = DImode;
6346 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6347 if (tmpmode == BLKmode)
6348 tmpmode = DImode;
6349 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6350 gen_rtx_REG (tmpmode, *intreg),
6351 GEN_INT (i*8));
6352 intreg++;
6353 break;
6354 case X86_64_SSESF_CLASS:
6355 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6356 gen_rtx_REG (SFmode,
6357 SSE_REGNO (sse_regno)),
6358 GEN_INT (i*8));
6359 sse_regno++;
6360 break;
6361 case X86_64_SSEDF_CLASS:
6362 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6363 gen_rtx_REG (DFmode,
6364 SSE_REGNO (sse_regno)),
6365 GEN_INT (i*8));
6366 sse_regno++;
6367 break;
6368 case X86_64_SSE_CLASS:
6369 pos = i;
6370 switch (n)
6371 {
6372 case 1:
6373 tmpmode = DImode;
6374 break;
6375 case 2:
6376 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6377 {
6378 tmpmode = TImode;
6379 i++;
6380 }
6381 else
6382 tmpmode = DImode;
6383 break;
6384 case 4:
6385 gcc_assert (i == 0
6386 && regclass[1] == X86_64_SSEUP_CLASS
6387 && regclass[2] == X86_64_SSEUP_CLASS
6388 && regclass[3] == X86_64_SSEUP_CLASS);
6389 tmpmode = OImode;
6390 i += 3;
6391 break;
6392 default:
6393 gcc_unreachable ();
6394 }
6395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6396 gen_rtx_REG (tmpmode,
6397 SSE_REGNO (sse_regno)),
6398 GEN_INT (pos*8));
6399 sse_regno++;
6400 break;
6401 default:
6402 gcc_unreachable ();
6403 }
6404 }
6405
6406 /* Empty aligned struct, union or class. */
6407 if (nexps == 0)
6408 return NULL;
6409
6410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6411 for (i = 0; i < nexps; i++)
6412 XVECEXP (ret, 0, i) = exp [i];
6413 return ret;
6414 }
6415
6416 /* Update the data in CUM to advance over an argument of mode MODE
6417 and data type TYPE. (TYPE is null for libcalls where that information
6418 may not be available.) */
6419
6420 static void
6421 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6422 const_tree type, HOST_WIDE_INT bytes,
6423 HOST_WIDE_INT words)
6424 {
6425 switch (mode)
6426 {
6427 default:
6428 break;
6429
6430 case BLKmode:
6431 if (bytes < 0)
6432 break;
6433 /* FALLTHRU */
6434
6435 case DImode:
6436 case SImode:
6437 case HImode:
6438 case QImode:
6439 cum->words += words;
6440 cum->nregs -= words;
6441 cum->regno += words;
6442
6443 if (cum->nregs <= 0)
6444 {
6445 cum->nregs = 0;
6446 cum->regno = 0;
6447 }
6448 break;
6449
6450 case OImode:
6451 /* OImode shouldn't be used directly. */
6452 gcc_unreachable ();
6453
6454 case DFmode:
6455 if (cum->float_in_sse < 2)
6456 break;
6457 case SFmode:
6458 if (cum->float_in_sse < 1)
6459 break;
6460 /* FALLTHRU */
6461
6462 case V8SFmode:
6463 case V8SImode:
6464 case V32QImode:
6465 case V16HImode:
6466 case V4DFmode:
6467 case V4DImode:
6468 case TImode:
6469 case V16QImode:
6470 case V8HImode:
6471 case V4SImode:
6472 case V2DImode:
6473 case V4SFmode:
6474 case V2DFmode:
6475 if (!type || !AGGREGATE_TYPE_P (type))
6476 {
6477 cum->sse_words += words;
6478 cum->sse_nregs -= 1;
6479 cum->sse_regno += 1;
6480 if (cum->sse_nregs <= 0)
6481 {
6482 cum->sse_nregs = 0;
6483 cum->sse_regno = 0;
6484 }
6485 }
6486 break;
6487
6488 case V8QImode:
6489 case V4HImode:
6490 case V2SImode:
6491 case V2SFmode:
6492 case V1TImode:
6493 case V1DImode:
6494 if (!type || !AGGREGATE_TYPE_P (type))
6495 {
6496 cum->mmx_words += words;
6497 cum->mmx_nregs -= 1;
6498 cum->mmx_regno += 1;
6499 if (cum->mmx_nregs <= 0)
6500 {
6501 cum->mmx_nregs = 0;
6502 cum->mmx_regno = 0;
6503 }
6504 }
6505 break;
6506 }
6507 }
6508
6509 static void
6510 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6511 const_tree type, HOST_WIDE_INT words, bool named)
6512 {
6513 int int_nregs, sse_nregs;
6514
6515 /* Unnamed 256bit vector mode parameters are passed on stack. */
6516 if (!named && VALID_AVX256_REG_MODE (mode))
6517 return;
6518
6519 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6520 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6521 {
6522 cum->nregs -= int_nregs;
6523 cum->sse_nregs -= sse_nregs;
6524 cum->regno += int_nregs;
6525 cum->sse_regno += sse_nregs;
6526 }
6527 else
6528 {
6529 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6530 cum->words = (cum->words + align - 1) & ~(align - 1);
6531 cum->words += words;
6532 }
6533 }
6534
6535 static void
6536 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6537 HOST_WIDE_INT words)
6538 {
6539 /* Otherwise, this should be passed indirect. */
6540 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6541
6542 cum->words += words;
6543 if (cum->nregs > 0)
6544 {
6545 cum->nregs -= 1;
6546 cum->regno += 1;
6547 }
6548 }
6549
6550 /* Update the data in CUM to advance over an argument of mode MODE and
6551 data type TYPE. (TYPE is null for libcalls where that information
6552 may not be available.) */
6553
6554 static void
6555 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6556 const_tree type, bool named)
6557 {
6558 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6559 HOST_WIDE_INT bytes, words;
6560
6561 if (mode == BLKmode)
6562 bytes = int_size_in_bytes (type);
6563 else
6564 bytes = GET_MODE_SIZE (mode);
6565 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6566
6567 if (type)
6568 mode = type_natural_mode (type, NULL);
6569
6570 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6571 function_arg_advance_ms_64 (cum, bytes, words);
6572 else if (TARGET_64BIT)
6573 function_arg_advance_64 (cum, mode, type, words, named);
6574 else
6575 function_arg_advance_32 (cum, mode, type, bytes, words);
6576 }
6577
6578 /* Define where to put the arguments to a function.
6579 Value is zero to push the argument on the stack,
6580 or a hard register in which to store the argument.
6581
6582 MODE is the argument's machine mode.
6583 TYPE is the data type of the argument (as a tree).
6584 This is null for libcalls where that information may
6585 not be available.
6586 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6587 the preceding args and about the function being called.
6588 NAMED is nonzero if this argument is a named parameter
6589 (otherwise it is an extra parameter matching an ellipsis). */
6590
6591 static rtx
6592 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6593 enum machine_mode orig_mode, const_tree type,
6594 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6595 {
6596 static bool warnedsse, warnedmmx;
6597
6598 /* Avoid the AL settings for the Unix64 ABI. */
6599 if (mode == VOIDmode)
6600 return constm1_rtx;
6601
6602 switch (mode)
6603 {
6604 default:
6605 break;
6606
6607 case BLKmode:
6608 if (bytes < 0)
6609 break;
6610 /* FALLTHRU */
6611 case DImode:
6612 case SImode:
6613 case HImode:
6614 case QImode:
6615 if (words <= cum->nregs)
6616 {
6617 int regno = cum->regno;
6618
6619 /* Fastcall allocates the first two DWORD (SImode) or
6620 smaller arguments to ECX and EDX if it isn't an
6621 aggregate type . */
6622 if (cum->fastcall)
6623 {
6624 if (mode == BLKmode
6625 || mode == DImode
6626 || (type && AGGREGATE_TYPE_P (type)))
6627 break;
6628
6629 /* ECX not EAX is the first allocated register. */
6630 if (regno == AX_REG)
6631 regno = CX_REG;
6632 }
6633 return gen_rtx_REG (mode, regno);
6634 }
6635 break;
6636
6637 case DFmode:
6638 if (cum->float_in_sse < 2)
6639 break;
6640 case SFmode:
6641 if (cum->float_in_sse < 1)
6642 break;
6643 /* FALLTHRU */
6644 case TImode:
6645 /* In 32bit, we pass TImode in xmm registers. */
6646 case V16QImode:
6647 case V8HImode:
6648 case V4SImode:
6649 case V2DImode:
6650 case V4SFmode:
6651 case V2DFmode:
6652 if (!type || !AGGREGATE_TYPE_P (type))
6653 {
6654 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6655 {
6656 warnedsse = true;
6657 warning (0, "SSE vector argument without SSE enabled "
6658 "changes the ABI");
6659 }
6660 if (cum->sse_nregs)
6661 return gen_reg_or_parallel (mode, orig_mode,
6662 cum->sse_regno + FIRST_SSE_REG);
6663 }
6664 break;
6665
6666 case OImode:
6667 /* OImode shouldn't be used directly. */
6668 gcc_unreachable ();
6669
6670 case V8SFmode:
6671 case V8SImode:
6672 case V32QImode:
6673 case V16HImode:
6674 case V4DFmode:
6675 case V4DImode:
6676 if (!type || !AGGREGATE_TYPE_P (type))
6677 {
6678 if (cum->sse_nregs)
6679 return gen_reg_or_parallel (mode, orig_mode,
6680 cum->sse_regno + FIRST_SSE_REG);
6681 }
6682 break;
6683
6684 case V8QImode:
6685 case V4HImode:
6686 case V2SImode:
6687 case V2SFmode:
6688 case V1TImode:
6689 case V1DImode:
6690 if (!type || !AGGREGATE_TYPE_P (type))
6691 {
6692 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6693 {
6694 warnedmmx = true;
6695 warning (0, "MMX vector argument without MMX enabled "
6696 "changes the ABI");
6697 }
6698 if (cum->mmx_nregs)
6699 return gen_reg_or_parallel (mode, orig_mode,
6700 cum->mmx_regno + FIRST_MMX_REG);
6701 }
6702 break;
6703 }
6704
6705 return NULL_RTX;
6706 }
6707
6708 static rtx
6709 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6710 enum machine_mode orig_mode, const_tree type, bool named)
6711 {
6712 /* Handle a hidden AL argument containing number of registers
6713 for varargs x86-64 functions. */
6714 if (mode == VOIDmode)
6715 return GEN_INT (cum->maybe_vaarg
6716 ? (cum->sse_nregs < 0
6717 ? X86_64_SSE_REGPARM_MAX
6718 : cum->sse_regno)
6719 : -1);
6720
6721 switch (mode)
6722 {
6723 default:
6724 break;
6725
6726 case V8SFmode:
6727 case V8SImode:
6728 case V32QImode:
6729 case V16HImode:
6730 case V4DFmode:
6731 case V4DImode:
6732 /* Unnamed 256bit vector mode parameters are passed on stack. */
6733 if (!named)
6734 return NULL;
6735 break;
6736 }
6737
6738 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6739 cum->sse_nregs,
6740 &x86_64_int_parameter_registers [cum->regno],
6741 cum->sse_regno);
6742 }
6743
6744 static rtx
6745 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6746 enum machine_mode orig_mode, bool named,
6747 HOST_WIDE_INT bytes)
6748 {
6749 unsigned int regno;
6750
6751 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6752 We use value of -2 to specify that current function call is MSABI. */
6753 if (mode == VOIDmode)
6754 return GEN_INT (-2);
6755
6756 /* If we've run out of registers, it goes on the stack. */
6757 if (cum->nregs == 0)
6758 return NULL_RTX;
6759
6760 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6761
6762 /* Only floating point modes are passed in anything but integer regs. */
6763 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6764 {
6765 if (named)
6766 regno = cum->regno + FIRST_SSE_REG;
6767 else
6768 {
6769 rtx t1, t2;
6770
6771 /* Unnamed floating parameters are passed in both the
6772 SSE and integer registers. */
6773 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6774 t2 = gen_rtx_REG (mode, regno);
6775 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6776 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6777 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6778 }
6779 }
6780 /* Handle aggregated types passed in register. */
6781 if (orig_mode == BLKmode)
6782 {
6783 if (bytes > 0 && bytes <= 8)
6784 mode = (bytes > 4 ? DImode : SImode);
6785 if (mode == BLKmode)
6786 mode = DImode;
6787 }
6788
6789 return gen_reg_or_parallel (mode, orig_mode, regno);
6790 }
6791
6792 /* Return where to put the arguments to a function.
6793 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6794
6795 MODE is the argument's machine mode. TYPE is the data type of the
6796 argument. It is null for libcalls where that information may not be
6797 available. CUM gives information about the preceding args and about
6798 the function being called. NAMED is nonzero if this argument is a
6799 named parameter (otherwise it is an extra parameter matching an
6800 ellipsis). */
6801
6802 static rtx
6803 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6804 const_tree type, bool named)
6805 {
6806 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6807 enum machine_mode mode = omode;
6808 HOST_WIDE_INT bytes, words;
6809 rtx arg;
6810
6811 if (mode == BLKmode)
6812 bytes = int_size_in_bytes (type);
6813 else
6814 bytes = GET_MODE_SIZE (mode);
6815 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6816
6817 /* To simplify the code below, represent vector types with a vector mode
6818 even if MMX/SSE are not active. */
6819 if (type && TREE_CODE (type) == VECTOR_TYPE)
6820 mode = type_natural_mode (type, cum);
6821
6822 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6823 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6824 else if (TARGET_64BIT)
6825 arg = function_arg_64 (cum, mode, omode, type, named);
6826 else
6827 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6828
6829 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6830 {
6831 /* This argument uses 256bit AVX modes. */
6832 if (cum->caller)
6833 cfun->machine->callee_pass_avx256_p = true;
6834 else
6835 cfun->machine->caller_pass_avx256_p = true;
6836 }
6837
6838 return arg;
6839 }
6840
6841 /* A C expression that indicates when an argument must be passed by
6842 reference. If nonzero for an argument, a copy of that argument is
6843 made in memory and a pointer to the argument is passed instead of
6844 the argument itself. The pointer is passed in whatever way is
6845 appropriate for passing a pointer to that type. */
6846
6847 static bool
6848 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6849 enum machine_mode mode ATTRIBUTE_UNUSED,
6850 const_tree type, bool named ATTRIBUTE_UNUSED)
6851 {
6852 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6853
6854 /* See Windows x64 Software Convention. */
6855 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6856 {
6857 int msize = (int) GET_MODE_SIZE (mode);
6858 if (type)
6859 {
6860 /* Arrays are passed by reference. */
6861 if (TREE_CODE (type) == ARRAY_TYPE)
6862 return true;
6863
6864 if (AGGREGATE_TYPE_P (type))
6865 {
6866 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6867 are passed by reference. */
6868 msize = int_size_in_bytes (type);
6869 }
6870 }
6871
6872 /* __m128 is passed by reference. */
6873 switch (msize) {
6874 case 1: case 2: case 4: case 8:
6875 break;
6876 default:
6877 return true;
6878 }
6879 }
6880 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6881 return 1;
6882
6883 return 0;
6884 }
6885
6886 /* Return true when TYPE should be 128bit aligned for 32bit argument
6887 passing ABI. XXX: This function is obsolete and is only used for
6888 checking psABI compatibility with previous versions of GCC. */
6889
6890 static bool
6891 ix86_compat_aligned_value_p (const_tree type)
6892 {
6893 enum machine_mode mode = TYPE_MODE (type);
6894 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6895 || mode == TDmode
6896 || mode == TFmode
6897 || mode == TCmode)
6898 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6899 return true;
6900 if (TYPE_ALIGN (type) < 128)
6901 return false;
6902
6903 if (AGGREGATE_TYPE_P (type))
6904 {
6905 /* Walk the aggregates recursively. */
6906 switch (TREE_CODE (type))
6907 {
6908 case RECORD_TYPE:
6909 case UNION_TYPE:
6910 case QUAL_UNION_TYPE:
6911 {
6912 tree field;
6913
6914 /* Walk all the structure fields. */
6915 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6916 {
6917 if (TREE_CODE (field) == FIELD_DECL
6918 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6919 return true;
6920 }
6921 break;
6922 }
6923
6924 case ARRAY_TYPE:
6925 /* Just for use if some languages passes arrays by value. */
6926 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6927 return true;
6928 break;
6929
6930 default:
6931 gcc_unreachable ();
6932 }
6933 }
6934 return false;
6935 }
6936
6937 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6938 XXX: This function is obsolete and is only used for checking psABI
6939 compatibility with previous versions of GCC. */
6940
6941 static unsigned int
6942 ix86_compat_function_arg_boundary (enum machine_mode mode,
6943 const_tree type, unsigned int align)
6944 {
6945 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6946 natural boundaries. */
6947 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6948 {
6949 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6950 make an exception for SSE modes since these require 128bit
6951 alignment.
6952
6953 The handling here differs from field_alignment. ICC aligns MMX
6954 arguments to 4 byte boundaries, while structure fields are aligned
6955 to 8 byte boundaries. */
6956 if (!type)
6957 {
6958 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6959 align = PARM_BOUNDARY;
6960 }
6961 else
6962 {
6963 if (!ix86_compat_aligned_value_p (type))
6964 align = PARM_BOUNDARY;
6965 }
6966 }
6967 if (align > BIGGEST_ALIGNMENT)
6968 align = BIGGEST_ALIGNMENT;
6969 return align;
6970 }
6971
6972 /* Return true when TYPE should be 128bit aligned for 32bit argument
6973 passing ABI. */
6974
6975 static bool
6976 ix86_contains_aligned_value_p (const_tree type)
6977 {
6978 enum machine_mode mode = TYPE_MODE (type);
6979
6980 if (mode == XFmode || mode == XCmode)
6981 return false;
6982
6983 if (TYPE_ALIGN (type) < 128)
6984 return false;
6985
6986 if (AGGREGATE_TYPE_P (type))
6987 {
6988 /* Walk the aggregates recursively. */
6989 switch (TREE_CODE (type))
6990 {
6991 case RECORD_TYPE:
6992 case UNION_TYPE:
6993 case QUAL_UNION_TYPE:
6994 {
6995 tree field;
6996
6997 /* Walk all the structure fields. */
6998 for (field = TYPE_FIELDS (type);
6999 field;
7000 field = DECL_CHAIN (field))
7001 {
7002 if (TREE_CODE (field) == FIELD_DECL
7003 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7004 return true;
7005 }
7006 break;
7007 }
7008
7009 case ARRAY_TYPE:
7010 /* Just for use if some languages passes arrays by value. */
7011 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7012 return true;
7013 break;
7014
7015 default:
7016 gcc_unreachable ();
7017 }
7018 }
7019 else
7020 return TYPE_ALIGN (type) >= 128;
7021
7022 return false;
7023 }
7024
7025 /* Gives the alignment boundary, in bits, of an argument with the
7026 specified mode and type. */
7027
7028 static unsigned int
7029 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7030 {
7031 unsigned int align;
7032 if (type)
7033 {
7034 /* Since the main variant type is used for call, we convert it to
7035 the main variant type. */
7036 type = TYPE_MAIN_VARIANT (type);
7037 align = TYPE_ALIGN (type);
7038 }
7039 else
7040 align = GET_MODE_ALIGNMENT (mode);
7041 if (align < PARM_BOUNDARY)
7042 align = PARM_BOUNDARY;
7043 else
7044 {
7045 static bool warned;
7046 unsigned int saved_align = align;
7047
7048 if (!TARGET_64BIT)
7049 {
7050 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7051 if (!type)
7052 {
7053 if (mode == XFmode || mode == XCmode)
7054 align = PARM_BOUNDARY;
7055 }
7056 else if (!ix86_contains_aligned_value_p (type))
7057 align = PARM_BOUNDARY;
7058
7059 if (align < 128)
7060 align = PARM_BOUNDARY;
7061 }
7062
7063 if (warn_psabi
7064 && !warned
7065 && align != ix86_compat_function_arg_boundary (mode, type,
7066 saved_align))
7067 {
7068 warned = true;
7069 inform (input_location,
7070 "The ABI for passing parameters with %d-byte"
7071 " alignment has changed in GCC 4.6",
7072 align / BITS_PER_UNIT);
7073 }
7074 }
7075
7076 return align;
7077 }
7078
7079 /* Return true if N is a possible register number of function value. */
7080
7081 static bool
7082 ix86_function_value_regno_p (const unsigned int regno)
7083 {
7084 switch (regno)
7085 {
7086 case AX_REG:
7087 return true;
7088
7089 case FIRST_FLOAT_REG:
7090 /* TODO: The function should depend on current function ABI but
7091 builtins.c would need updating then. Therefore we use the
7092 default ABI. */
7093 if (TARGET_64BIT && ix86_abi == MS_ABI)
7094 return false;
7095 return TARGET_FLOAT_RETURNS_IN_80387;
7096
7097 case FIRST_SSE_REG:
7098 return TARGET_SSE;
7099
7100 case FIRST_MMX_REG:
7101 if (TARGET_MACHO || TARGET_64BIT)
7102 return false;
7103 return TARGET_MMX;
7104 }
7105
7106 return false;
7107 }
7108
7109 /* Define how to find the value returned by a function.
7110 VALTYPE is the data type of the value (as a tree).
7111 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7112 otherwise, FUNC is 0. */
7113
7114 static rtx
7115 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7116 const_tree fntype, const_tree fn)
7117 {
7118 unsigned int regno;
7119
7120 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7121 we normally prevent this case when mmx is not available. However
7122 some ABIs may require the result to be returned like DImode. */
7123 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7124 regno = FIRST_MMX_REG;
7125
7126 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7127 we prevent this case when sse is not available. However some ABIs
7128 may require the result to be returned like integer TImode. */
7129 else if (mode == TImode
7130 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7131 regno = FIRST_SSE_REG;
7132
7133 /* 32-byte vector modes in %ymm0. */
7134 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7135 regno = FIRST_SSE_REG;
7136
7137 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7138 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7139 regno = FIRST_FLOAT_REG;
7140 else
7141 /* Most things go in %eax. */
7142 regno = AX_REG;
7143
7144 /* Override FP return register with %xmm0 for local functions when
7145 SSE math is enabled or for functions with sseregparm attribute. */
7146 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7147 {
7148 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7149 if ((sse_level >= 1 && mode == SFmode)
7150 || (sse_level == 2 && mode == DFmode))
7151 regno = FIRST_SSE_REG;
7152 }
7153
7154 /* OImode shouldn't be used directly. */
7155 gcc_assert (mode != OImode);
7156
7157 return gen_rtx_REG (orig_mode, regno);
7158 }
7159
7160 static rtx
7161 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7162 const_tree valtype)
7163 {
7164 rtx ret;
7165
7166 /* Handle libcalls, which don't provide a type node. */
7167 if (valtype == NULL)
7168 {
7169 unsigned int regno;
7170
7171 switch (mode)
7172 {
7173 case SFmode:
7174 case SCmode:
7175 case DFmode:
7176 case DCmode:
7177 case TFmode:
7178 case SDmode:
7179 case DDmode:
7180 case TDmode:
7181 regno = FIRST_SSE_REG;
7182 break;
7183 case XFmode:
7184 case XCmode:
7185 regno = FIRST_FLOAT_REG;
7186 break;
7187 case TCmode:
7188 return NULL;
7189 default:
7190 regno = AX_REG;
7191 }
7192
7193 return gen_rtx_REG (mode, regno);
7194 }
7195 else if (POINTER_TYPE_P (valtype))
7196 {
7197 /* Pointers are always returned in Pmode. */
7198 mode = Pmode;
7199 }
7200
7201 ret = construct_container (mode, orig_mode, valtype, 1,
7202 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7203 x86_64_int_return_registers, 0);
7204
7205 /* For zero sized structures, construct_container returns NULL, but we
7206 need to keep rest of compiler happy by returning meaningful value. */
7207 if (!ret)
7208 ret = gen_rtx_REG (orig_mode, AX_REG);
7209
7210 return ret;
7211 }
7212
7213 static rtx
7214 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7215 {
7216 unsigned int regno = AX_REG;
7217
7218 if (TARGET_SSE)
7219 {
7220 switch (GET_MODE_SIZE (mode))
7221 {
7222 case 16:
7223 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7224 && !COMPLEX_MODE_P (mode))
7225 regno = FIRST_SSE_REG;
7226 break;
7227 case 8:
7228 case 4:
7229 if (mode == SFmode || mode == DFmode)
7230 regno = FIRST_SSE_REG;
7231 break;
7232 default:
7233 break;
7234 }
7235 }
7236 return gen_rtx_REG (orig_mode, regno);
7237 }
7238
7239 static rtx
7240 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7241 enum machine_mode orig_mode, enum machine_mode mode)
7242 {
7243 const_tree fn, fntype;
7244
7245 fn = NULL_TREE;
7246 if (fntype_or_decl && DECL_P (fntype_or_decl))
7247 fn = fntype_or_decl;
7248 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7249
7250 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7251 return function_value_ms_64 (orig_mode, mode);
7252 else if (TARGET_64BIT)
7253 return function_value_64 (orig_mode, mode, valtype);
7254 else
7255 return function_value_32 (orig_mode, mode, fntype, fn);
7256 }
7257
7258 static rtx
7259 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7260 bool outgoing ATTRIBUTE_UNUSED)
7261 {
7262 enum machine_mode mode, orig_mode;
7263
7264 orig_mode = TYPE_MODE (valtype);
7265 mode = type_natural_mode (valtype, NULL);
7266 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7267 }
7268
7269 /* Pointer function arguments and return values are promoted to Pmode. */
7270
7271 static enum machine_mode
7272 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7273 int *punsignedp, const_tree fntype,
7274 int for_return)
7275 {
7276 if (type != NULL_TREE && POINTER_TYPE_P (type))
7277 {
7278 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7279 return Pmode;
7280 }
7281 return default_promote_function_mode (type, mode, punsignedp, fntype,
7282 for_return);
7283 }
7284
7285 rtx
7286 ix86_libcall_value (enum machine_mode mode)
7287 {
7288 return ix86_function_value_1 (NULL, NULL, mode, mode);
7289 }
7290
7291 /* Return true iff type is returned in memory. */
7292
7293 static bool ATTRIBUTE_UNUSED
7294 return_in_memory_32 (const_tree type, enum machine_mode mode)
7295 {
7296 HOST_WIDE_INT size;
7297
7298 if (mode == BLKmode)
7299 return true;
7300
7301 size = int_size_in_bytes (type);
7302
7303 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7304 return false;
7305
7306 if (VECTOR_MODE_P (mode) || mode == TImode)
7307 {
7308 /* User-created vectors small enough to fit in EAX. */
7309 if (size < 8)
7310 return false;
7311
7312 /* MMX/3dNow values are returned in MM0,
7313 except when it doesn't exits or the ABI prescribes otherwise. */
7314 if (size == 8)
7315 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7316
7317 /* SSE values are returned in XMM0, except when it doesn't exist. */
7318 if (size == 16)
7319 return !TARGET_SSE;
7320
7321 /* AVX values are returned in YMM0, except when it doesn't exist. */
7322 if (size == 32)
7323 return !TARGET_AVX;
7324 }
7325
7326 if (mode == XFmode)
7327 return false;
7328
7329 if (size > 12)
7330 return true;
7331
7332 /* OImode shouldn't be used directly. */
7333 gcc_assert (mode != OImode);
7334
7335 return false;
7336 }
7337
7338 static bool ATTRIBUTE_UNUSED
7339 return_in_memory_64 (const_tree type, enum machine_mode mode)
7340 {
7341 int needed_intregs, needed_sseregs;
7342 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7343 }
7344
7345 static bool ATTRIBUTE_UNUSED
7346 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7347 {
7348 HOST_WIDE_INT size = int_size_in_bytes (type);
7349
7350 /* __m128 is returned in xmm0. */
7351 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7352 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7353 return false;
7354
7355 /* Otherwise, the size must be exactly in [1248]. */
7356 return size != 1 && size != 2 && size != 4 && size != 8;
7357 }
7358
7359 static bool
7360 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7361 {
7362 #ifdef SUBTARGET_RETURN_IN_MEMORY
7363 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7364 #else
7365 const enum machine_mode mode = type_natural_mode (type, NULL);
7366
7367 if (TARGET_64BIT)
7368 {
7369 if (ix86_function_type_abi (fntype) == MS_ABI)
7370 return return_in_memory_ms_64 (type, mode);
7371 else
7372 return return_in_memory_64 (type, mode);
7373 }
7374 else
7375 return return_in_memory_32 (type, mode);
7376 #endif
7377 }
7378
7379 /* When returning SSE vector types, we have a choice of either
7380 (1) being abi incompatible with a -march switch, or
7381 (2) generating an error.
7382 Given no good solution, I think the safest thing is one warning.
7383 The user won't be able to use -Werror, but....
7384
7385 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7386 called in response to actually generating a caller or callee that
7387 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7388 via aggregate_value_p for general type probing from tree-ssa. */
7389
7390 static rtx
7391 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7392 {
7393 static bool warnedsse, warnedmmx;
7394
7395 if (!TARGET_64BIT && type)
7396 {
7397 /* Look at the return type of the function, not the function type. */
7398 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7399
7400 if (!TARGET_SSE && !warnedsse)
7401 {
7402 if (mode == TImode
7403 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7404 {
7405 warnedsse = true;
7406 warning (0, "SSE vector return without SSE enabled "
7407 "changes the ABI");
7408 }
7409 }
7410
7411 if (!TARGET_MMX && !warnedmmx)
7412 {
7413 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7414 {
7415 warnedmmx = true;
7416 warning (0, "MMX vector return without MMX enabled "
7417 "changes the ABI");
7418 }
7419 }
7420 }
7421
7422 return NULL;
7423 }
7424
7425 \f
7426 /* Create the va_list data type. */
7427
7428 /* Returns the calling convention specific va_list date type.
7429 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7430
7431 static tree
7432 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7433 {
7434 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7435
7436 /* For i386 we use plain pointer to argument area. */
7437 if (!TARGET_64BIT || abi == MS_ABI)
7438 return build_pointer_type (char_type_node);
7439
7440 record = lang_hooks.types.make_type (RECORD_TYPE);
7441 type_decl = build_decl (BUILTINS_LOCATION,
7442 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7443
7444 f_gpr = build_decl (BUILTINS_LOCATION,
7445 FIELD_DECL, get_identifier ("gp_offset"),
7446 unsigned_type_node);
7447 f_fpr = build_decl (BUILTINS_LOCATION,
7448 FIELD_DECL, get_identifier ("fp_offset"),
7449 unsigned_type_node);
7450 f_ovf = build_decl (BUILTINS_LOCATION,
7451 FIELD_DECL, get_identifier ("overflow_arg_area"),
7452 ptr_type_node);
7453 f_sav = build_decl (BUILTINS_LOCATION,
7454 FIELD_DECL, get_identifier ("reg_save_area"),
7455 ptr_type_node);
7456
7457 va_list_gpr_counter_field = f_gpr;
7458 va_list_fpr_counter_field = f_fpr;
7459
7460 DECL_FIELD_CONTEXT (f_gpr) = record;
7461 DECL_FIELD_CONTEXT (f_fpr) = record;
7462 DECL_FIELD_CONTEXT (f_ovf) = record;
7463 DECL_FIELD_CONTEXT (f_sav) = record;
7464
7465 TYPE_STUB_DECL (record) = type_decl;
7466 TYPE_NAME (record) = type_decl;
7467 TYPE_FIELDS (record) = f_gpr;
7468 DECL_CHAIN (f_gpr) = f_fpr;
7469 DECL_CHAIN (f_fpr) = f_ovf;
7470 DECL_CHAIN (f_ovf) = f_sav;
7471
7472 layout_type (record);
7473
7474 /* The correct type is an array type of one element. */
7475 return build_array_type (record, build_index_type (size_zero_node));
7476 }
7477
7478 /* Setup the builtin va_list data type and for 64-bit the additional
7479 calling convention specific va_list data types. */
7480
7481 static tree
7482 ix86_build_builtin_va_list (void)
7483 {
7484 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7485
7486 /* Initialize abi specific va_list builtin types. */
7487 if (TARGET_64BIT)
7488 {
7489 tree t;
7490 if (ix86_abi == MS_ABI)
7491 {
7492 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7493 if (TREE_CODE (t) != RECORD_TYPE)
7494 t = build_variant_type_copy (t);
7495 sysv_va_list_type_node = t;
7496 }
7497 else
7498 {
7499 t = ret;
7500 if (TREE_CODE (t) != RECORD_TYPE)
7501 t = build_variant_type_copy (t);
7502 sysv_va_list_type_node = t;
7503 }
7504 if (ix86_abi != MS_ABI)
7505 {
7506 t = ix86_build_builtin_va_list_abi (MS_ABI);
7507 if (TREE_CODE (t) != RECORD_TYPE)
7508 t = build_variant_type_copy (t);
7509 ms_va_list_type_node = t;
7510 }
7511 else
7512 {
7513 t = ret;
7514 if (TREE_CODE (t) != RECORD_TYPE)
7515 t = build_variant_type_copy (t);
7516 ms_va_list_type_node = t;
7517 }
7518 }
7519
7520 return ret;
7521 }
7522
7523 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7524
7525 static void
7526 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7527 {
7528 rtx save_area, mem;
7529 alias_set_type set;
7530 int i, max;
7531
7532 /* GPR size of varargs save area. */
7533 if (cfun->va_list_gpr_size)
7534 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7535 else
7536 ix86_varargs_gpr_size = 0;
7537
7538 /* FPR size of varargs save area. We don't need it if we don't pass
7539 anything in SSE registers. */
7540 if (TARGET_SSE && cfun->va_list_fpr_size)
7541 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7542 else
7543 ix86_varargs_fpr_size = 0;
7544
7545 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7546 return;
7547
7548 save_area = frame_pointer_rtx;
7549 set = get_varargs_alias_set ();
7550
7551 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7552 if (max > X86_64_REGPARM_MAX)
7553 max = X86_64_REGPARM_MAX;
7554
7555 for (i = cum->regno; i < max; i++)
7556 {
7557 mem = gen_rtx_MEM (Pmode,
7558 plus_constant (save_area, i * UNITS_PER_WORD));
7559 MEM_NOTRAP_P (mem) = 1;
7560 set_mem_alias_set (mem, set);
7561 emit_move_insn (mem, gen_rtx_REG (Pmode,
7562 x86_64_int_parameter_registers[i]));
7563 }
7564
7565 if (ix86_varargs_fpr_size)
7566 {
7567 enum machine_mode smode;
7568 rtx label, test;
7569
7570 /* Now emit code to save SSE registers. The AX parameter contains number
7571 of SSE parameter registers used to call this function, though all we
7572 actually check here is the zero/non-zero status. */
7573
7574 label = gen_label_rtx ();
7575 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7576 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7577 label));
7578
7579 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7580 we used movdqa (i.e. TImode) instead? Perhaps even better would
7581 be if we could determine the real mode of the data, via a hook
7582 into pass_stdarg. Ignore all that for now. */
7583 smode = V4SFmode;
7584 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7585 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7586
7587 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7588 if (max > X86_64_SSE_REGPARM_MAX)
7589 max = X86_64_SSE_REGPARM_MAX;
7590
7591 for (i = cum->sse_regno; i < max; ++i)
7592 {
7593 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7594 mem = gen_rtx_MEM (smode, mem);
7595 MEM_NOTRAP_P (mem) = 1;
7596 set_mem_alias_set (mem, set);
7597 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7598
7599 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7600 }
7601
7602 emit_label (label);
7603 }
7604 }
7605
7606 static void
7607 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7608 {
7609 alias_set_type set = get_varargs_alias_set ();
7610 int i;
7611
7612 /* Reset to zero, as there might be a sysv vaarg used
7613 before. */
7614 ix86_varargs_gpr_size = 0;
7615 ix86_varargs_fpr_size = 0;
7616
7617 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7618 {
7619 rtx reg, mem;
7620
7621 mem = gen_rtx_MEM (Pmode,
7622 plus_constant (virtual_incoming_args_rtx,
7623 i * UNITS_PER_WORD));
7624 MEM_NOTRAP_P (mem) = 1;
7625 set_mem_alias_set (mem, set);
7626
7627 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7628 emit_move_insn (mem, reg);
7629 }
7630 }
7631
7632 static void
7633 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7634 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7635 int no_rtl)
7636 {
7637 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7638 CUMULATIVE_ARGS next_cum;
7639 tree fntype;
7640
7641 /* This argument doesn't appear to be used anymore. Which is good,
7642 because the old code here didn't suppress rtl generation. */
7643 gcc_assert (!no_rtl);
7644
7645 if (!TARGET_64BIT)
7646 return;
7647
7648 fntype = TREE_TYPE (current_function_decl);
7649
7650 /* For varargs, we do not want to skip the dummy va_dcl argument.
7651 For stdargs, we do want to skip the last named argument. */
7652 next_cum = *cum;
7653 if (stdarg_p (fntype))
7654 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7655 true);
7656
7657 if (cum->call_abi == MS_ABI)
7658 setup_incoming_varargs_ms_64 (&next_cum);
7659 else
7660 setup_incoming_varargs_64 (&next_cum);
7661 }
7662
7663 /* Checks if TYPE is of kind va_list char *. */
7664
7665 static bool
7666 is_va_list_char_pointer (tree type)
7667 {
7668 tree canonic;
7669
7670 /* For 32-bit it is always true. */
7671 if (!TARGET_64BIT)
7672 return true;
7673 canonic = ix86_canonical_va_list_type (type);
7674 return (canonic == ms_va_list_type_node
7675 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7676 }
7677
7678 /* Implement va_start. */
7679
7680 static void
7681 ix86_va_start (tree valist, rtx nextarg)
7682 {
7683 HOST_WIDE_INT words, n_gpr, n_fpr;
7684 tree f_gpr, f_fpr, f_ovf, f_sav;
7685 tree gpr, fpr, ovf, sav, t;
7686 tree type;
7687 rtx ovf_rtx;
7688
7689 if (flag_split_stack
7690 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7691 {
7692 unsigned int scratch_regno;
7693
7694 /* When we are splitting the stack, we can't refer to the stack
7695 arguments using internal_arg_pointer, because they may be on
7696 the old stack. The split stack prologue will arrange to
7697 leave a pointer to the old stack arguments in a scratch
7698 register, which we here copy to a pseudo-register. The split
7699 stack prologue can't set the pseudo-register directly because
7700 it (the prologue) runs before any registers have been saved. */
7701
7702 scratch_regno = split_stack_prologue_scratch_regno ();
7703 if (scratch_regno != INVALID_REGNUM)
7704 {
7705 rtx reg, seq;
7706
7707 reg = gen_reg_rtx (Pmode);
7708 cfun->machine->split_stack_varargs_pointer = reg;
7709
7710 start_sequence ();
7711 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7712 seq = get_insns ();
7713 end_sequence ();
7714
7715 push_topmost_sequence ();
7716 emit_insn_after (seq, entry_of_function ());
7717 pop_topmost_sequence ();
7718 }
7719 }
7720
7721 /* Only 64bit target needs something special. */
7722 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7723 {
7724 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7725 std_expand_builtin_va_start (valist, nextarg);
7726 else
7727 {
7728 rtx va_r, next;
7729
7730 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7731 next = expand_binop (ptr_mode, add_optab,
7732 cfun->machine->split_stack_varargs_pointer,
7733 crtl->args.arg_offset_rtx,
7734 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7735 convert_move (va_r, next, 0);
7736 }
7737 return;
7738 }
7739
7740 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7741 f_fpr = DECL_CHAIN (f_gpr);
7742 f_ovf = DECL_CHAIN (f_fpr);
7743 f_sav = DECL_CHAIN (f_ovf);
7744
7745 valist = build_simple_mem_ref (valist);
7746 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7747 /* The following should be folded into the MEM_REF offset. */
7748 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7749 f_gpr, NULL_TREE);
7750 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7751 f_fpr, NULL_TREE);
7752 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7753 f_ovf, NULL_TREE);
7754 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7755 f_sav, NULL_TREE);
7756
7757 /* Count number of gp and fp argument registers used. */
7758 words = crtl->args.info.words;
7759 n_gpr = crtl->args.info.regno;
7760 n_fpr = crtl->args.info.sse_regno;
7761
7762 if (cfun->va_list_gpr_size)
7763 {
7764 type = TREE_TYPE (gpr);
7765 t = build2 (MODIFY_EXPR, type,
7766 gpr, build_int_cst (type, n_gpr * 8));
7767 TREE_SIDE_EFFECTS (t) = 1;
7768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7769 }
7770
7771 if (TARGET_SSE && cfun->va_list_fpr_size)
7772 {
7773 type = TREE_TYPE (fpr);
7774 t = build2 (MODIFY_EXPR, type, fpr,
7775 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7776 TREE_SIDE_EFFECTS (t) = 1;
7777 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7778 }
7779
7780 /* Find the overflow area. */
7781 type = TREE_TYPE (ovf);
7782 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7783 ovf_rtx = crtl->args.internal_arg_pointer;
7784 else
7785 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7786 t = make_tree (type, ovf_rtx);
7787 if (words != 0)
7788 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7789 t = build2 (MODIFY_EXPR, type, ovf, t);
7790 TREE_SIDE_EFFECTS (t) = 1;
7791 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7792
7793 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7794 {
7795 /* Find the register save area.
7796 Prologue of the function save it right above stack frame. */
7797 type = TREE_TYPE (sav);
7798 t = make_tree (type, frame_pointer_rtx);
7799 if (!ix86_varargs_gpr_size)
7800 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7801 t = build2 (MODIFY_EXPR, type, sav, t);
7802 TREE_SIDE_EFFECTS (t) = 1;
7803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7804 }
7805 }
7806
7807 /* Implement va_arg. */
7808
7809 static tree
7810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7811 gimple_seq *post_p)
7812 {
7813 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7814 tree f_gpr, f_fpr, f_ovf, f_sav;
7815 tree gpr, fpr, ovf, sav, t;
7816 int size, rsize;
7817 tree lab_false, lab_over = NULL_TREE;
7818 tree addr, t2;
7819 rtx container;
7820 int indirect_p = 0;
7821 tree ptrtype;
7822 enum machine_mode nat_mode;
7823 unsigned int arg_boundary;
7824
7825 /* Only 64bit target needs something special. */
7826 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7827 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7828
7829 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7830 f_fpr = DECL_CHAIN (f_gpr);
7831 f_ovf = DECL_CHAIN (f_fpr);
7832 f_sav = DECL_CHAIN (f_ovf);
7833
7834 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7835 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7836 valist = build_va_arg_indirect_ref (valist);
7837 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7838 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7839 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7840
7841 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7842 if (indirect_p)
7843 type = build_pointer_type (type);
7844 size = int_size_in_bytes (type);
7845 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7846
7847 nat_mode = type_natural_mode (type, NULL);
7848 switch (nat_mode)
7849 {
7850 case V8SFmode:
7851 case V8SImode:
7852 case V32QImode:
7853 case V16HImode:
7854 case V4DFmode:
7855 case V4DImode:
7856 /* Unnamed 256bit vector mode parameters are passed on stack. */
7857 if (!TARGET_64BIT_MS_ABI)
7858 {
7859 container = NULL;
7860 break;
7861 }
7862
7863 default:
7864 container = construct_container (nat_mode, TYPE_MODE (type),
7865 type, 0, X86_64_REGPARM_MAX,
7866 X86_64_SSE_REGPARM_MAX, intreg,
7867 0);
7868 break;
7869 }
7870
7871 /* Pull the value out of the saved registers. */
7872
7873 addr = create_tmp_var (ptr_type_node, "addr");
7874
7875 if (container)
7876 {
7877 int needed_intregs, needed_sseregs;
7878 bool need_temp;
7879 tree int_addr, sse_addr;
7880
7881 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7882 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7883
7884 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7885
7886 need_temp = (!REG_P (container)
7887 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7888 || TYPE_ALIGN (type) > 128));
7889
7890 /* In case we are passing structure, verify that it is consecutive block
7891 on the register save area. If not we need to do moves. */
7892 if (!need_temp && !REG_P (container))
7893 {
7894 /* Verify that all registers are strictly consecutive */
7895 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7896 {
7897 int i;
7898
7899 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7900 {
7901 rtx slot = XVECEXP (container, 0, i);
7902 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7903 || INTVAL (XEXP (slot, 1)) != i * 16)
7904 need_temp = 1;
7905 }
7906 }
7907 else
7908 {
7909 int i;
7910
7911 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7912 {
7913 rtx slot = XVECEXP (container, 0, i);
7914 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7915 || INTVAL (XEXP (slot, 1)) != i * 8)
7916 need_temp = 1;
7917 }
7918 }
7919 }
7920 if (!need_temp)
7921 {
7922 int_addr = addr;
7923 sse_addr = addr;
7924 }
7925 else
7926 {
7927 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7928 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7929 }
7930
7931 /* First ensure that we fit completely in registers. */
7932 if (needed_intregs)
7933 {
7934 t = build_int_cst (TREE_TYPE (gpr),
7935 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7936 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7937 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7938 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7939 gimplify_and_add (t, pre_p);
7940 }
7941 if (needed_sseregs)
7942 {
7943 t = build_int_cst (TREE_TYPE (fpr),
7944 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7945 + X86_64_REGPARM_MAX * 8);
7946 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7947 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7948 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7949 gimplify_and_add (t, pre_p);
7950 }
7951
7952 /* Compute index to start of area used for integer regs. */
7953 if (needed_intregs)
7954 {
7955 /* int_addr = gpr + sav; */
7956 t = fold_build_pointer_plus (sav, gpr);
7957 gimplify_assign (int_addr, t, pre_p);
7958 }
7959 if (needed_sseregs)
7960 {
7961 /* sse_addr = fpr + sav; */
7962 t = fold_build_pointer_plus (sav, fpr);
7963 gimplify_assign (sse_addr, t, pre_p);
7964 }
7965 if (need_temp)
7966 {
7967 int i, prev_size = 0;
7968 tree temp = create_tmp_var (type, "va_arg_tmp");
7969
7970 /* addr = &temp; */
7971 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7972 gimplify_assign (addr, t, pre_p);
7973
7974 for (i = 0; i < XVECLEN (container, 0); i++)
7975 {
7976 rtx slot = XVECEXP (container, 0, i);
7977 rtx reg = XEXP (slot, 0);
7978 enum machine_mode mode = GET_MODE (reg);
7979 tree piece_type;
7980 tree addr_type;
7981 tree daddr_type;
7982 tree src_addr, src;
7983 int src_offset;
7984 tree dest_addr, dest;
7985 int cur_size = GET_MODE_SIZE (mode);
7986
7987 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7988 prev_size = INTVAL (XEXP (slot, 1));
7989 if (prev_size + cur_size > size)
7990 {
7991 cur_size = size - prev_size;
7992 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7993 if (mode == BLKmode)
7994 mode = QImode;
7995 }
7996 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7997 if (mode == GET_MODE (reg))
7998 addr_type = build_pointer_type (piece_type);
7999 else
8000 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8001 true);
8002 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8003 true);
8004
8005 if (SSE_REGNO_P (REGNO (reg)))
8006 {
8007 src_addr = sse_addr;
8008 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8009 }
8010 else
8011 {
8012 src_addr = int_addr;
8013 src_offset = REGNO (reg) * 8;
8014 }
8015 src_addr = fold_convert (addr_type, src_addr);
8016 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8017
8018 dest_addr = fold_convert (daddr_type, addr);
8019 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8020 if (cur_size == GET_MODE_SIZE (mode))
8021 {
8022 src = build_va_arg_indirect_ref (src_addr);
8023 dest = build_va_arg_indirect_ref (dest_addr);
8024
8025 gimplify_assign (dest, src, pre_p);
8026 }
8027 else
8028 {
8029 tree copy
8030 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8031 3, dest_addr, src_addr,
8032 size_int (cur_size));
8033 gimplify_and_add (copy, pre_p);
8034 }
8035 prev_size += cur_size;
8036 }
8037 }
8038
8039 if (needed_intregs)
8040 {
8041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8043 gimplify_assign (gpr, t, pre_p);
8044 }
8045
8046 if (needed_sseregs)
8047 {
8048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8050 gimplify_assign (fpr, t, pre_p);
8051 }
8052
8053 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8054
8055 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8056 }
8057
8058 /* ... otherwise out of the overflow area. */
8059
8060 /* When we align parameter on stack for caller, if the parameter
8061 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8062 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8063 here with caller. */
8064 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8065 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8066 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8067
8068 /* Care for on-stack alignment if needed. */
8069 if (arg_boundary <= 64 || size == 0)
8070 t = ovf;
8071 else
8072 {
8073 HOST_WIDE_INT align = arg_boundary / 8;
8074 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8075 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8076 build_int_cst (TREE_TYPE (t), -align));
8077 }
8078
8079 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8080 gimplify_assign (addr, t, pre_p);
8081
8082 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8083 gimplify_assign (unshare_expr (ovf), t, pre_p);
8084
8085 if (container)
8086 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8087
8088 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8089 addr = fold_convert (ptrtype, addr);
8090
8091 if (indirect_p)
8092 addr = build_va_arg_indirect_ref (addr);
8093 return build_va_arg_indirect_ref (addr);
8094 }
8095 \f
8096 /* Return true if OPNUM's MEM should be matched
8097 in movabs* patterns. */
8098
8099 bool
8100 ix86_check_movabs (rtx insn, int opnum)
8101 {
8102 rtx set, mem;
8103
8104 set = PATTERN (insn);
8105 if (GET_CODE (set) == PARALLEL)
8106 set = XVECEXP (set, 0, 0);
8107 gcc_assert (GET_CODE (set) == SET);
8108 mem = XEXP (set, opnum);
8109 while (GET_CODE (mem) == SUBREG)
8110 mem = SUBREG_REG (mem);
8111 gcc_assert (MEM_P (mem));
8112 return volatile_ok || !MEM_VOLATILE_P (mem);
8113 }
8114 \f
8115 /* Initialize the table of extra 80387 mathematical constants. */
8116
8117 static void
8118 init_ext_80387_constants (void)
8119 {
8120 static const char * cst[5] =
8121 {
8122 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8123 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8124 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8125 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8126 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8127 };
8128 int i;
8129
8130 for (i = 0; i < 5; i++)
8131 {
8132 real_from_string (&ext_80387_constants_table[i], cst[i]);
8133 /* Ensure each constant is rounded to XFmode precision. */
8134 real_convert (&ext_80387_constants_table[i],
8135 XFmode, &ext_80387_constants_table[i]);
8136 }
8137
8138 ext_80387_constants_init = 1;
8139 }
8140
8141 /* Return non-zero if the constant is something that
8142 can be loaded with a special instruction. */
8143
8144 int
8145 standard_80387_constant_p (rtx x)
8146 {
8147 enum machine_mode mode = GET_MODE (x);
8148
8149 REAL_VALUE_TYPE r;
8150
8151 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8152 return -1;
8153
8154 if (x == CONST0_RTX (mode))
8155 return 1;
8156 if (x == CONST1_RTX (mode))
8157 return 2;
8158
8159 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8160
8161 /* For XFmode constants, try to find a special 80387 instruction when
8162 optimizing for size or on those CPUs that benefit from them. */
8163 if (mode == XFmode
8164 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8165 {
8166 int i;
8167
8168 if (! ext_80387_constants_init)
8169 init_ext_80387_constants ();
8170
8171 for (i = 0; i < 5; i++)
8172 if (real_identical (&r, &ext_80387_constants_table[i]))
8173 return i + 3;
8174 }
8175
8176 /* Load of the constant -0.0 or -1.0 will be split as
8177 fldz;fchs or fld1;fchs sequence. */
8178 if (real_isnegzero (&r))
8179 return 8;
8180 if (real_identical (&r, &dconstm1))
8181 return 9;
8182
8183 return 0;
8184 }
8185
8186 /* Return the opcode of the special instruction to be used to load
8187 the constant X. */
8188
8189 const char *
8190 standard_80387_constant_opcode (rtx x)
8191 {
8192 switch (standard_80387_constant_p (x))
8193 {
8194 case 1:
8195 return "fldz";
8196 case 2:
8197 return "fld1";
8198 case 3:
8199 return "fldlg2";
8200 case 4:
8201 return "fldln2";
8202 case 5:
8203 return "fldl2e";
8204 case 6:
8205 return "fldl2t";
8206 case 7:
8207 return "fldpi";
8208 case 8:
8209 case 9:
8210 return "#";
8211 default:
8212 gcc_unreachable ();
8213 }
8214 }
8215
8216 /* Return the CONST_DOUBLE representing the 80387 constant that is
8217 loaded by the specified special instruction. The argument IDX
8218 matches the return value from standard_80387_constant_p. */
8219
8220 rtx
8221 standard_80387_constant_rtx (int idx)
8222 {
8223 int i;
8224
8225 if (! ext_80387_constants_init)
8226 init_ext_80387_constants ();
8227
8228 switch (idx)
8229 {
8230 case 3:
8231 case 4:
8232 case 5:
8233 case 6:
8234 case 7:
8235 i = idx - 3;
8236 break;
8237
8238 default:
8239 gcc_unreachable ();
8240 }
8241
8242 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8243 XFmode);
8244 }
8245
8246 /* Return 1 if X is all 0s and 2 if x is all 1s
8247 in supported SSE/AVX vector mode. */
8248
8249 int
8250 standard_sse_constant_p (rtx x)
8251 {
8252 enum machine_mode mode = GET_MODE (x);
8253
8254 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8255 return 1;
8256 if (vector_all_ones_operand (x, mode))
8257 switch (mode)
8258 {
8259 case V16QImode:
8260 case V8HImode:
8261 case V4SImode:
8262 case V2DImode:
8263 if (TARGET_SSE2)
8264 return 2;
8265 case V32QImode:
8266 case V16HImode:
8267 case V8SImode:
8268 case V4DImode:
8269 if (TARGET_AVX2)
8270 return 2;
8271 default:
8272 break;
8273 }
8274
8275 return 0;
8276 }
8277
8278 /* Return the opcode of the special instruction to be used to load
8279 the constant X. */
8280
8281 const char *
8282 standard_sse_constant_opcode (rtx insn, rtx x)
8283 {
8284 switch (standard_sse_constant_p (x))
8285 {
8286 case 1:
8287 switch (get_attr_mode (insn))
8288 {
8289 case MODE_TI:
8290 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8291 return "%vpxor\t%0, %d0";
8292 case MODE_V2DF:
8293 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8294 return "%vxorpd\t%0, %d0";
8295 case MODE_V4SF:
8296 return "%vxorps\t%0, %d0";
8297
8298 case MODE_OI:
8299 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8300 return "vpxor\t%x0, %x0, %x0";
8301 case MODE_V4DF:
8302 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8303 return "vxorpd\t%x0, %x0, %x0";
8304 case MODE_V8SF:
8305 return "vxorps\t%x0, %x0, %x0";
8306
8307 default:
8308 break;
8309 }
8310
8311 case 2:
8312 if (TARGET_AVX)
8313 return "vpcmpeqd\t%0, %0, %0";
8314 else
8315 return "pcmpeqd\t%0, %0";
8316
8317 default:
8318 break;
8319 }
8320 gcc_unreachable ();
8321 }
8322
8323 /* Returns true if OP contains a symbol reference */
8324
8325 bool
8326 symbolic_reference_mentioned_p (rtx op)
8327 {
8328 const char *fmt;
8329 int i;
8330
8331 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8332 return true;
8333
8334 fmt = GET_RTX_FORMAT (GET_CODE (op));
8335 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8336 {
8337 if (fmt[i] == 'E')
8338 {
8339 int j;
8340
8341 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8342 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8343 return true;
8344 }
8345
8346 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8347 return true;
8348 }
8349
8350 return false;
8351 }
8352
8353 /* Return true if it is appropriate to emit `ret' instructions in the
8354 body of a function. Do this only if the epilogue is simple, needing a
8355 couple of insns. Prior to reloading, we can't tell how many registers
8356 must be saved, so return false then. Return false if there is no frame
8357 marker to de-allocate. */
8358
8359 bool
8360 ix86_can_use_return_insn_p (void)
8361 {
8362 struct ix86_frame frame;
8363
8364 if (! reload_completed || frame_pointer_needed)
8365 return 0;
8366
8367 /* Don't allow more than 32k pop, since that's all we can do
8368 with one instruction. */
8369 if (crtl->args.pops_args && crtl->args.size >= 32768)
8370 return 0;
8371
8372 ix86_compute_frame_layout (&frame);
8373 return (frame.stack_pointer_offset == UNITS_PER_WORD
8374 && (frame.nregs + frame.nsseregs) == 0);
8375 }
8376 \f
8377 /* Value should be nonzero if functions must have frame pointers.
8378 Zero means the frame pointer need not be set up (and parms may
8379 be accessed via the stack pointer) in functions that seem suitable. */
8380
8381 static bool
8382 ix86_frame_pointer_required (void)
8383 {
8384 /* If we accessed previous frames, then the generated code expects
8385 to be able to access the saved ebp value in our frame. */
8386 if (cfun->machine->accesses_prev_frame)
8387 return true;
8388
8389 /* Several x86 os'es need a frame pointer for other reasons,
8390 usually pertaining to setjmp. */
8391 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8392 return true;
8393
8394 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8395 turns off the frame pointer by default. Turn it back on now if
8396 we've not got a leaf function. */
8397 if (TARGET_OMIT_LEAF_FRAME_POINTER
8398 && (!current_function_is_leaf
8399 || ix86_current_function_calls_tls_descriptor))
8400 return true;
8401
8402 if (crtl->profile && !flag_fentry)
8403 return true;
8404
8405 return false;
8406 }
8407
8408 /* Record that the current function accesses previous call frames. */
8409
8410 void
8411 ix86_setup_frame_addresses (void)
8412 {
8413 cfun->machine->accesses_prev_frame = 1;
8414 }
8415 \f
8416 #ifndef USE_HIDDEN_LINKONCE
8417 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8418 # define USE_HIDDEN_LINKONCE 1
8419 # else
8420 # define USE_HIDDEN_LINKONCE 0
8421 # endif
8422 #endif
8423
8424 static int pic_labels_used;
8425
8426 /* Fills in the label name that should be used for a pc thunk for
8427 the given register. */
8428
8429 static void
8430 get_pc_thunk_name (char name[32], unsigned int regno)
8431 {
8432 gcc_assert (!TARGET_64BIT);
8433
8434 if (USE_HIDDEN_LINKONCE)
8435 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8436 else
8437 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8438 }
8439
8440
8441 /* This function generates code for -fpic that loads %ebx with
8442 the return address of the caller and then returns. */
8443
8444 static void
8445 ix86_code_end (void)
8446 {
8447 rtx xops[2];
8448 int regno;
8449
8450 for (regno = AX_REG; regno <= SP_REG; regno++)
8451 {
8452 char name[32];
8453 tree decl;
8454
8455 if (!(pic_labels_used & (1 << regno)))
8456 continue;
8457
8458 get_pc_thunk_name (name, regno);
8459
8460 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8461 get_identifier (name),
8462 build_function_type_list (void_type_node, NULL_TREE));
8463 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8464 NULL_TREE, void_type_node);
8465 TREE_PUBLIC (decl) = 1;
8466 TREE_STATIC (decl) = 1;
8467
8468 #if TARGET_MACHO
8469 if (TARGET_MACHO)
8470 {
8471 switch_to_section (darwin_sections[text_coal_section]);
8472 fputs ("\t.weak_definition\t", asm_out_file);
8473 assemble_name (asm_out_file, name);
8474 fputs ("\n\t.private_extern\t", asm_out_file);
8475 assemble_name (asm_out_file, name);
8476 putc ('\n', asm_out_file);
8477 ASM_OUTPUT_LABEL (asm_out_file, name);
8478 DECL_WEAK (decl) = 1;
8479 }
8480 else
8481 #endif
8482 if (USE_HIDDEN_LINKONCE)
8483 {
8484 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8485
8486 targetm.asm_out.unique_section (decl, 0);
8487 switch_to_section (get_named_section (decl, NULL, 0));
8488
8489 targetm.asm_out.globalize_label (asm_out_file, name);
8490 fputs ("\t.hidden\t", asm_out_file);
8491 assemble_name (asm_out_file, name);
8492 putc ('\n', asm_out_file);
8493 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8494 }
8495 else
8496 {
8497 switch_to_section (text_section);
8498 ASM_OUTPUT_LABEL (asm_out_file, name);
8499 }
8500
8501 DECL_INITIAL (decl) = make_node (BLOCK);
8502 current_function_decl = decl;
8503 init_function_start (decl);
8504 first_function_block_is_cold = false;
8505 /* Make sure unwind info is emitted for the thunk if needed. */
8506 final_start_function (emit_barrier (), asm_out_file, 1);
8507
8508 /* Pad stack IP move with 4 instructions (two NOPs count
8509 as one instruction). */
8510 if (TARGET_PAD_SHORT_FUNCTION)
8511 {
8512 int i = 8;
8513
8514 while (i--)
8515 fputs ("\tnop\n", asm_out_file);
8516 }
8517
8518 xops[0] = gen_rtx_REG (Pmode, regno);
8519 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8520 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8521 fputs ("\tret\n", asm_out_file);
8522 final_end_function ();
8523 init_insn_lengths ();
8524 free_after_compilation (cfun);
8525 set_cfun (NULL);
8526 current_function_decl = NULL;
8527 }
8528
8529 if (flag_split_stack)
8530 file_end_indicate_split_stack ();
8531 }
8532
8533 /* Emit code for the SET_GOT patterns. */
8534
8535 const char *
8536 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8537 {
8538 rtx xops[3];
8539
8540 xops[0] = dest;
8541
8542 if (TARGET_VXWORKS_RTP && flag_pic)
8543 {
8544 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8545 xops[2] = gen_rtx_MEM (Pmode,
8546 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8547 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8548
8549 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8550 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8551 an unadorned address. */
8552 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8553 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8554 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8555 return "";
8556 }
8557
8558 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8559
8560 if (!flag_pic)
8561 {
8562 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8563
8564 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8565
8566 #if TARGET_MACHO
8567 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8568 is what will be referenced by the Mach-O PIC subsystem. */
8569 if (!label)
8570 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8571 #endif
8572
8573 targetm.asm_out.internal_label (asm_out_file, "L",
8574 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8575 }
8576 else
8577 {
8578 char name[32];
8579 get_pc_thunk_name (name, REGNO (dest));
8580 pic_labels_used |= 1 << REGNO (dest);
8581
8582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8584 output_asm_insn ("call\t%X2", xops);
8585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8586 is what will be referenced by the Mach-O PIC subsystem. */
8587 #if TARGET_MACHO
8588 if (!label)
8589 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8590 else
8591 targetm.asm_out.internal_label (asm_out_file, "L",
8592 CODE_LABEL_NUMBER (label));
8593 #endif
8594 }
8595
8596 if (!TARGET_MACHO)
8597 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8598
8599 return "";
8600 }
8601
8602 /* Generate an "push" pattern for input ARG. */
8603
8604 static rtx
8605 gen_push (rtx arg)
8606 {
8607 struct machine_function *m = cfun->machine;
8608
8609 if (m->fs.cfa_reg == stack_pointer_rtx)
8610 m->fs.cfa_offset += UNITS_PER_WORD;
8611 m->fs.sp_offset += UNITS_PER_WORD;
8612
8613 return gen_rtx_SET (VOIDmode,
8614 gen_rtx_MEM (Pmode,
8615 gen_rtx_PRE_DEC (Pmode,
8616 stack_pointer_rtx)),
8617 arg);
8618 }
8619
8620 /* Generate an "pop" pattern for input ARG. */
8621
8622 static rtx
8623 gen_pop (rtx arg)
8624 {
8625 return gen_rtx_SET (VOIDmode,
8626 arg,
8627 gen_rtx_MEM (Pmode,
8628 gen_rtx_POST_INC (Pmode,
8629 stack_pointer_rtx)));
8630 }
8631
8632 /* Return >= 0 if there is an unused call-clobbered register available
8633 for the entire function. */
8634
8635 static unsigned int
8636 ix86_select_alt_pic_regnum (void)
8637 {
8638 if (current_function_is_leaf
8639 && !crtl->profile
8640 && !ix86_current_function_calls_tls_descriptor)
8641 {
8642 int i, drap;
8643 /* Can't use the same register for both PIC and DRAP. */
8644 if (crtl->drap_reg)
8645 drap = REGNO (crtl->drap_reg);
8646 else
8647 drap = -1;
8648 for (i = 2; i >= 0; --i)
8649 if (i != drap && !df_regs_ever_live_p (i))
8650 return i;
8651 }
8652
8653 return INVALID_REGNUM;
8654 }
8655
8656 /* Return TRUE if we need to save REGNO. */
8657
8658 static bool
8659 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8660 {
8661 if (pic_offset_table_rtx
8662 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8663 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8664 || crtl->profile
8665 || crtl->calls_eh_return
8666 || crtl->uses_const_pool))
8667 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8668
8669 if (crtl->calls_eh_return && maybe_eh_return)
8670 {
8671 unsigned i;
8672 for (i = 0; ; i++)
8673 {
8674 unsigned test = EH_RETURN_DATA_REGNO (i);
8675 if (test == INVALID_REGNUM)
8676 break;
8677 if (test == regno)
8678 return true;
8679 }
8680 }
8681
8682 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8683 return true;
8684
8685 return (df_regs_ever_live_p (regno)
8686 && !call_used_regs[regno]
8687 && !fixed_regs[regno]
8688 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8689 }
8690
8691 /* Return number of saved general prupose registers. */
8692
8693 static int
8694 ix86_nsaved_regs (void)
8695 {
8696 int nregs = 0;
8697 int regno;
8698
8699 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8700 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8701 nregs ++;
8702 return nregs;
8703 }
8704
8705 /* Return number of saved SSE registrers. */
8706
8707 static int
8708 ix86_nsaved_sseregs (void)
8709 {
8710 int nregs = 0;
8711 int regno;
8712
8713 if (!TARGET_64BIT_MS_ABI)
8714 return 0;
8715 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8716 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8717 nregs ++;
8718 return nregs;
8719 }
8720
8721 /* Given FROM and TO register numbers, say whether this elimination is
8722 allowed. If stack alignment is needed, we can only replace argument
8723 pointer with hard frame pointer, or replace frame pointer with stack
8724 pointer. Otherwise, frame pointer elimination is automatically
8725 handled and all other eliminations are valid. */
8726
8727 static bool
8728 ix86_can_eliminate (const int from, const int to)
8729 {
8730 if (stack_realign_fp)
8731 return ((from == ARG_POINTER_REGNUM
8732 && to == HARD_FRAME_POINTER_REGNUM)
8733 || (from == FRAME_POINTER_REGNUM
8734 && to == STACK_POINTER_REGNUM));
8735 else
8736 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8737 }
8738
8739 /* Return the offset between two registers, one to be eliminated, and the other
8740 its replacement, at the start of a routine. */
8741
8742 HOST_WIDE_INT
8743 ix86_initial_elimination_offset (int from, int to)
8744 {
8745 struct ix86_frame frame;
8746 ix86_compute_frame_layout (&frame);
8747
8748 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8749 return frame.hard_frame_pointer_offset;
8750 else if (from == FRAME_POINTER_REGNUM
8751 && to == HARD_FRAME_POINTER_REGNUM)
8752 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8753 else
8754 {
8755 gcc_assert (to == STACK_POINTER_REGNUM);
8756
8757 if (from == ARG_POINTER_REGNUM)
8758 return frame.stack_pointer_offset;
8759
8760 gcc_assert (from == FRAME_POINTER_REGNUM);
8761 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8762 }
8763 }
8764
8765 /* In a dynamically-aligned function, we can't know the offset from
8766 stack pointer to frame pointer, so we must ensure that setjmp
8767 eliminates fp against the hard fp (%ebp) rather than trying to
8768 index from %esp up to the top of the frame across a gap that is
8769 of unknown (at compile-time) size. */
8770 static rtx
8771 ix86_builtin_setjmp_frame_value (void)
8772 {
8773 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8774 }
8775
8776 /* When using -fsplit-stack, the allocation routines set a field in
8777 the TCB to the bottom of the stack plus this much space, measured
8778 in bytes. */
8779
8780 #define SPLIT_STACK_AVAILABLE 256
8781
8782 /* Fill structure ix86_frame about frame of currently computed function. */
8783
8784 static void
8785 ix86_compute_frame_layout (struct ix86_frame *frame)
8786 {
8787 unsigned int stack_alignment_needed;
8788 HOST_WIDE_INT offset;
8789 unsigned int preferred_alignment;
8790 HOST_WIDE_INT size = get_frame_size ();
8791 HOST_WIDE_INT to_allocate;
8792
8793 frame->nregs = ix86_nsaved_regs ();
8794 frame->nsseregs = ix86_nsaved_sseregs ();
8795
8796 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8797 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8798
8799 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8800 function prologues and leaf. */
8801 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8802 && (!current_function_is_leaf || cfun->calls_alloca != 0
8803 || ix86_current_function_calls_tls_descriptor))
8804 {
8805 preferred_alignment = 16;
8806 stack_alignment_needed = 16;
8807 crtl->preferred_stack_boundary = 128;
8808 crtl->stack_alignment_needed = 128;
8809 }
8810
8811 gcc_assert (!size || stack_alignment_needed);
8812 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8813 gcc_assert (preferred_alignment <= stack_alignment_needed);
8814
8815 /* For SEH we have to limit the amount of code movement into the prologue.
8816 At present we do this via a BLOCKAGE, at which point there's very little
8817 scheduling that can be done, which means that there's very little point
8818 in doing anything except PUSHs. */
8819 if (TARGET_SEH)
8820 cfun->machine->use_fast_prologue_epilogue = false;
8821
8822 /* During reload iteration the amount of registers saved can change.
8823 Recompute the value as needed. Do not recompute when amount of registers
8824 didn't change as reload does multiple calls to the function and does not
8825 expect the decision to change within single iteration. */
8826 else if (!optimize_function_for_size_p (cfun)
8827 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8828 {
8829 int count = frame->nregs;
8830 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8831
8832 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8833
8834 /* The fast prologue uses move instead of push to save registers. This
8835 is significantly longer, but also executes faster as modern hardware
8836 can execute the moves in parallel, but can't do that for push/pop.
8837
8838 Be careful about choosing what prologue to emit: When function takes
8839 many instructions to execute we may use slow version as well as in
8840 case function is known to be outside hot spot (this is known with
8841 feedback only). Weight the size of function by number of registers
8842 to save as it is cheap to use one or two push instructions but very
8843 slow to use many of them. */
8844 if (count)
8845 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8846 if (node->frequency < NODE_FREQUENCY_NORMAL
8847 || (flag_branch_probabilities
8848 && node->frequency < NODE_FREQUENCY_HOT))
8849 cfun->machine->use_fast_prologue_epilogue = false;
8850 else
8851 cfun->machine->use_fast_prologue_epilogue
8852 = !expensive_function_p (count);
8853 }
8854
8855 frame->save_regs_using_mov
8856 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8857 /* If static stack checking is enabled and done with probes,
8858 the registers need to be saved before allocating the frame. */
8859 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8860
8861 /* Skip return address. */
8862 offset = UNITS_PER_WORD;
8863
8864 /* Skip pushed static chain. */
8865 if (ix86_static_chain_on_stack)
8866 offset += UNITS_PER_WORD;
8867
8868 /* Skip saved base pointer. */
8869 if (frame_pointer_needed)
8870 offset += UNITS_PER_WORD;
8871 frame->hfp_save_offset = offset;
8872
8873 /* The traditional frame pointer location is at the top of the frame. */
8874 frame->hard_frame_pointer_offset = offset;
8875
8876 /* Register save area */
8877 offset += frame->nregs * UNITS_PER_WORD;
8878 frame->reg_save_offset = offset;
8879
8880 /* Align and set SSE register save area. */
8881 if (frame->nsseregs)
8882 {
8883 /* The only ABI that has saved SSE registers (Win64) also has a
8884 16-byte aligned default stack, and thus we don't need to be
8885 within the re-aligned local stack frame to save them. */
8886 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8887 offset = (offset + 16 - 1) & -16;
8888 offset += frame->nsseregs * 16;
8889 }
8890 frame->sse_reg_save_offset = offset;
8891
8892 /* The re-aligned stack starts here. Values before this point are not
8893 directly comparable with values below this point. In order to make
8894 sure that no value happens to be the same before and after, force
8895 the alignment computation below to add a non-zero value. */
8896 if (stack_realign_fp)
8897 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8898
8899 /* Va-arg area */
8900 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8901 offset += frame->va_arg_size;
8902
8903 /* Align start of frame for local function. */
8904 if (stack_realign_fp
8905 || offset != frame->sse_reg_save_offset
8906 || size != 0
8907 || !current_function_is_leaf
8908 || cfun->calls_alloca
8909 || ix86_current_function_calls_tls_descriptor)
8910 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8911
8912 /* Frame pointer points here. */
8913 frame->frame_pointer_offset = offset;
8914
8915 offset += size;
8916
8917 /* Add outgoing arguments area. Can be skipped if we eliminated
8918 all the function calls as dead code.
8919 Skipping is however impossible when function calls alloca. Alloca
8920 expander assumes that last crtl->outgoing_args_size
8921 of stack frame are unused. */
8922 if (ACCUMULATE_OUTGOING_ARGS
8923 && (!current_function_is_leaf || cfun->calls_alloca
8924 || ix86_current_function_calls_tls_descriptor))
8925 {
8926 offset += crtl->outgoing_args_size;
8927 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8928 }
8929 else
8930 frame->outgoing_arguments_size = 0;
8931
8932 /* Align stack boundary. Only needed if we're calling another function
8933 or using alloca. */
8934 if (!current_function_is_leaf || cfun->calls_alloca
8935 || ix86_current_function_calls_tls_descriptor)
8936 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8937
8938 /* We've reached end of stack frame. */
8939 frame->stack_pointer_offset = offset;
8940
8941 /* Size prologue needs to allocate. */
8942 to_allocate = offset - frame->sse_reg_save_offset;
8943
8944 if ((!to_allocate && frame->nregs <= 1)
8945 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8946 frame->save_regs_using_mov = false;
8947
8948 if (ix86_using_red_zone ()
8949 && current_function_sp_is_unchanging
8950 && current_function_is_leaf
8951 && !ix86_current_function_calls_tls_descriptor)
8952 {
8953 frame->red_zone_size = to_allocate;
8954 if (frame->save_regs_using_mov)
8955 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8956 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8957 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8958 }
8959 else
8960 frame->red_zone_size = 0;
8961 frame->stack_pointer_offset -= frame->red_zone_size;
8962
8963 /* The SEH frame pointer location is near the bottom of the frame.
8964 This is enforced by the fact that the difference between the
8965 stack pointer and the frame pointer is limited to 240 bytes in
8966 the unwind data structure. */
8967 if (TARGET_SEH)
8968 {
8969 HOST_WIDE_INT diff;
8970
8971 /* If we can leave the frame pointer where it is, do so. */
8972 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8973 if (diff > 240 || (diff & 15) != 0)
8974 {
8975 /* Ideally we'd determine what portion of the local stack frame
8976 (within the constraint of the lowest 240) is most heavily used.
8977 But without that complication, simply bias the frame pointer
8978 by 128 bytes so as to maximize the amount of the local stack
8979 frame that is addressable with 8-bit offsets. */
8980 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8981 }
8982 }
8983 }
8984
8985 /* This is semi-inlined memory_address_length, but simplified
8986 since we know that we're always dealing with reg+offset, and
8987 to avoid having to create and discard all that rtl. */
8988
8989 static inline int
8990 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8991 {
8992 int len = 4;
8993
8994 if (offset == 0)
8995 {
8996 /* EBP and R13 cannot be encoded without an offset. */
8997 len = (regno == BP_REG || regno == R13_REG);
8998 }
8999 else if (IN_RANGE (offset, -128, 127))
9000 len = 1;
9001
9002 /* ESP and R12 must be encoded with a SIB byte. */
9003 if (regno == SP_REG || regno == R12_REG)
9004 len++;
9005
9006 return len;
9007 }
9008
9009 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9010 The valid base registers are taken from CFUN->MACHINE->FS. */
9011
9012 static rtx
9013 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9014 {
9015 const struct machine_function *m = cfun->machine;
9016 rtx base_reg = NULL;
9017 HOST_WIDE_INT base_offset = 0;
9018
9019 if (m->use_fast_prologue_epilogue)
9020 {
9021 /* Choose the base register most likely to allow the most scheduling
9022 opportunities. Generally FP is valid througout the function,
9023 while DRAP must be reloaded within the epilogue. But choose either
9024 over the SP due to increased encoding size. */
9025
9026 if (m->fs.fp_valid)
9027 {
9028 base_reg = hard_frame_pointer_rtx;
9029 base_offset = m->fs.fp_offset - cfa_offset;
9030 }
9031 else if (m->fs.drap_valid)
9032 {
9033 base_reg = crtl->drap_reg;
9034 base_offset = 0 - cfa_offset;
9035 }
9036 else if (m->fs.sp_valid)
9037 {
9038 base_reg = stack_pointer_rtx;
9039 base_offset = m->fs.sp_offset - cfa_offset;
9040 }
9041 }
9042 else
9043 {
9044 HOST_WIDE_INT toffset;
9045 int len = 16, tlen;
9046
9047 /* Choose the base register with the smallest address encoding.
9048 With a tie, choose FP > DRAP > SP. */
9049 if (m->fs.sp_valid)
9050 {
9051 base_reg = stack_pointer_rtx;
9052 base_offset = m->fs.sp_offset - cfa_offset;
9053 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9054 }
9055 if (m->fs.drap_valid)
9056 {
9057 toffset = 0 - cfa_offset;
9058 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9059 if (tlen <= len)
9060 {
9061 base_reg = crtl->drap_reg;
9062 base_offset = toffset;
9063 len = tlen;
9064 }
9065 }
9066 if (m->fs.fp_valid)
9067 {
9068 toffset = m->fs.fp_offset - cfa_offset;
9069 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9070 if (tlen <= len)
9071 {
9072 base_reg = hard_frame_pointer_rtx;
9073 base_offset = toffset;
9074 len = tlen;
9075 }
9076 }
9077 }
9078 gcc_assert (base_reg != NULL);
9079
9080 return plus_constant (base_reg, base_offset);
9081 }
9082
9083 /* Emit code to save registers in the prologue. */
9084
9085 static void
9086 ix86_emit_save_regs (void)
9087 {
9088 unsigned int regno;
9089 rtx insn;
9090
9091 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9092 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9093 {
9094 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9095 RTX_FRAME_RELATED_P (insn) = 1;
9096 }
9097 }
9098
9099 /* Emit a single register save at CFA - CFA_OFFSET. */
9100
9101 static void
9102 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9103 HOST_WIDE_INT cfa_offset)
9104 {
9105 struct machine_function *m = cfun->machine;
9106 rtx reg = gen_rtx_REG (mode, regno);
9107 rtx mem, addr, base, insn;
9108
9109 addr = choose_baseaddr (cfa_offset);
9110 mem = gen_frame_mem (mode, addr);
9111
9112 /* For SSE saves, we need to indicate the 128-bit alignment. */
9113 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9114
9115 insn = emit_move_insn (mem, reg);
9116 RTX_FRAME_RELATED_P (insn) = 1;
9117
9118 base = addr;
9119 if (GET_CODE (base) == PLUS)
9120 base = XEXP (base, 0);
9121 gcc_checking_assert (REG_P (base));
9122
9123 /* When saving registers into a re-aligned local stack frame, avoid
9124 any tricky guessing by dwarf2out. */
9125 if (m->fs.realigned)
9126 {
9127 gcc_checking_assert (stack_realign_drap);
9128
9129 if (regno == REGNO (crtl->drap_reg))
9130 {
9131 /* A bit of a hack. We force the DRAP register to be saved in
9132 the re-aligned stack frame, which provides us with a copy
9133 of the CFA that will last past the prologue. Install it. */
9134 gcc_checking_assert (cfun->machine->fs.fp_valid);
9135 addr = plus_constant (hard_frame_pointer_rtx,
9136 cfun->machine->fs.fp_offset - cfa_offset);
9137 mem = gen_rtx_MEM (mode, addr);
9138 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9139 }
9140 else
9141 {
9142 /* The frame pointer is a stable reference within the
9143 aligned frame. Use it. */
9144 gcc_checking_assert (cfun->machine->fs.fp_valid);
9145 addr = plus_constant (hard_frame_pointer_rtx,
9146 cfun->machine->fs.fp_offset - cfa_offset);
9147 mem = gen_rtx_MEM (mode, addr);
9148 add_reg_note (insn, REG_CFA_EXPRESSION,
9149 gen_rtx_SET (VOIDmode, mem, reg));
9150 }
9151 }
9152
9153 /* The memory may not be relative to the current CFA register,
9154 which means that we may need to generate a new pattern for
9155 use by the unwind info. */
9156 else if (base != m->fs.cfa_reg)
9157 {
9158 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9159 mem = gen_rtx_MEM (mode, addr);
9160 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9161 }
9162 }
9163
9164 /* Emit code to save registers using MOV insns.
9165 First register is stored at CFA - CFA_OFFSET. */
9166 static void
9167 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9168 {
9169 unsigned int regno;
9170
9171 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9172 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9173 {
9174 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9175 cfa_offset -= UNITS_PER_WORD;
9176 }
9177 }
9178
9179 /* Emit code to save SSE registers using MOV insns.
9180 First register is stored at CFA - CFA_OFFSET. */
9181 static void
9182 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9183 {
9184 unsigned int regno;
9185
9186 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9187 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9188 {
9189 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9190 cfa_offset -= 16;
9191 }
9192 }
9193
9194 static GTY(()) rtx queued_cfa_restores;
9195
9196 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9197 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9198 Don't add the note if the previously saved value will be left untouched
9199 within stack red-zone till return, as unwinders can find the same value
9200 in the register and on the stack. */
9201
9202 static void
9203 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9204 {
9205 if (!crtl->shrink_wrapped
9206 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9207 return;
9208
9209 if (insn)
9210 {
9211 add_reg_note (insn, REG_CFA_RESTORE, reg);
9212 RTX_FRAME_RELATED_P (insn) = 1;
9213 }
9214 else
9215 queued_cfa_restores
9216 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9217 }
9218
9219 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9220
9221 static void
9222 ix86_add_queued_cfa_restore_notes (rtx insn)
9223 {
9224 rtx last;
9225 if (!queued_cfa_restores)
9226 return;
9227 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9228 ;
9229 XEXP (last, 1) = REG_NOTES (insn);
9230 REG_NOTES (insn) = queued_cfa_restores;
9231 queued_cfa_restores = NULL_RTX;
9232 RTX_FRAME_RELATED_P (insn) = 1;
9233 }
9234
9235 /* Expand prologue or epilogue stack adjustment.
9236 The pattern exist to put a dependency on all ebp-based memory accesses.
9237 STYLE should be negative if instructions should be marked as frame related,
9238 zero if %r11 register is live and cannot be freely used and positive
9239 otherwise. */
9240
9241 static void
9242 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9243 int style, bool set_cfa)
9244 {
9245 struct machine_function *m = cfun->machine;
9246 rtx insn;
9247 bool add_frame_related_expr = false;
9248
9249 if (! TARGET_64BIT)
9250 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9251 else if (x86_64_immediate_operand (offset, DImode))
9252 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9253 else
9254 {
9255 rtx tmp;
9256 /* r11 is used by indirect sibcall return as well, set before the
9257 epilogue and used after the epilogue. */
9258 if (style)
9259 tmp = gen_rtx_REG (DImode, R11_REG);
9260 else
9261 {
9262 gcc_assert (src != hard_frame_pointer_rtx
9263 && dest != hard_frame_pointer_rtx);
9264 tmp = hard_frame_pointer_rtx;
9265 }
9266 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9267 if (style < 0)
9268 add_frame_related_expr = true;
9269
9270 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9271 }
9272
9273 insn = emit_insn (insn);
9274 if (style >= 0)
9275 ix86_add_queued_cfa_restore_notes (insn);
9276
9277 if (set_cfa)
9278 {
9279 rtx r;
9280
9281 gcc_assert (m->fs.cfa_reg == src);
9282 m->fs.cfa_offset += INTVAL (offset);
9283 m->fs.cfa_reg = dest;
9284
9285 r = gen_rtx_PLUS (Pmode, src, offset);
9286 r = gen_rtx_SET (VOIDmode, dest, r);
9287 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9288 RTX_FRAME_RELATED_P (insn) = 1;
9289 }
9290 else if (style < 0)
9291 {
9292 RTX_FRAME_RELATED_P (insn) = 1;
9293 if (add_frame_related_expr)
9294 {
9295 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9296 r = gen_rtx_SET (VOIDmode, dest, r);
9297 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9298 }
9299 }
9300
9301 if (dest == stack_pointer_rtx)
9302 {
9303 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9304 bool valid = m->fs.sp_valid;
9305
9306 if (src == hard_frame_pointer_rtx)
9307 {
9308 valid = m->fs.fp_valid;
9309 ooffset = m->fs.fp_offset;
9310 }
9311 else if (src == crtl->drap_reg)
9312 {
9313 valid = m->fs.drap_valid;
9314 ooffset = 0;
9315 }
9316 else
9317 {
9318 /* Else there are two possibilities: SP itself, which we set
9319 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9320 taken care of this by hand along the eh_return path. */
9321 gcc_checking_assert (src == stack_pointer_rtx
9322 || offset == const0_rtx);
9323 }
9324
9325 m->fs.sp_offset = ooffset - INTVAL (offset);
9326 m->fs.sp_valid = valid;
9327 }
9328 }
9329
9330 /* Find an available register to be used as dynamic realign argument
9331 pointer regsiter. Such a register will be written in prologue and
9332 used in begin of body, so it must not be
9333 1. parameter passing register.
9334 2. GOT pointer.
9335 We reuse static-chain register if it is available. Otherwise, we
9336 use DI for i386 and R13 for x86-64. We chose R13 since it has
9337 shorter encoding.
9338
9339 Return: the regno of chosen register. */
9340
9341 static unsigned int
9342 find_drap_reg (void)
9343 {
9344 tree decl = cfun->decl;
9345
9346 if (TARGET_64BIT)
9347 {
9348 /* Use R13 for nested function or function need static chain.
9349 Since function with tail call may use any caller-saved
9350 registers in epilogue, DRAP must not use caller-saved
9351 register in such case. */
9352 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9353 return R13_REG;
9354
9355 return R10_REG;
9356 }
9357 else
9358 {
9359 /* Use DI for nested function or function need static chain.
9360 Since function with tail call may use any caller-saved
9361 registers in epilogue, DRAP must not use caller-saved
9362 register in such case. */
9363 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9364 return DI_REG;
9365
9366 /* Reuse static chain register if it isn't used for parameter
9367 passing. */
9368 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9369 {
9370 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9371 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9372 return CX_REG;
9373 }
9374 return DI_REG;
9375 }
9376 }
9377
9378 /* Return minimum incoming stack alignment. */
9379
9380 static unsigned int
9381 ix86_minimum_incoming_stack_boundary (bool sibcall)
9382 {
9383 unsigned int incoming_stack_boundary;
9384
9385 /* Prefer the one specified at command line. */
9386 if (ix86_user_incoming_stack_boundary)
9387 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9388 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9389 if -mstackrealign is used, it isn't used for sibcall check and
9390 estimated stack alignment is 128bit. */
9391 else if (!sibcall
9392 && !TARGET_64BIT
9393 && ix86_force_align_arg_pointer
9394 && crtl->stack_alignment_estimated == 128)
9395 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9396 else
9397 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9398
9399 /* Incoming stack alignment can be changed on individual functions
9400 via force_align_arg_pointer attribute. We use the smallest
9401 incoming stack boundary. */
9402 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9403 && lookup_attribute (ix86_force_align_arg_pointer_string,
9404 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9405 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9406
9407 /* The incoming stack frame has to be aligned at least at
9408 parm_stack_boundary. */
9409 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9410 incoming_stack_boundary = crtl->parm_stack_boundary;
9411
9412 /* Stack at entrance of main is aligned by runtime. We use the
9413 smallest incoming stack boundary. */
9414 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9415 && DECL_NAME (current_function_decl)
9416 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9417 && DECL_FILE_SCOPE_P (current_function_decl))
9418 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9419
9420 return incoming_stack_boundary;
9421 }
9422
9423 /* Update incoming stack boundary and estimated stack alignment. */
9424
9425 static void
9426 ix86_update_stack_boundary (void)
9427 {
9428 ix86_incoming_stack_boundary
9429 = ix86_minimum_incoming_stack_boundary (false);
9430
9431 /* x86_64 vararg needs 16byte stack alignment for register save
9432 area. */
9433 if (TARGET_64BIT
9434 && cfun->stdarg
9435 && crtl->stack_alignment_estimated < 128)
9436 crtl->stack_alignment_estimated = 128;
9437 }
9438
9439 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9440 needed or an rtx for DRAP otherwise. */
9441
9442 static rtx
9443 ix86_get_drap_rtx (void)
9444 {
9445 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9446 crtl->need_drap = true;
9447
9448 if (stack_realign_drap)
9449 {
9450 /* Assign DRAP to vDRAP and returns vDRAP */
9451 unsigned int regno = find_drap_reg ();
9452 rtx drap_vreg;
9453 rtx arg_ptr;
9454 rtx seq, insn;
9455
9456 arg_ptr = gen_rtx_REG (Pmode, regno);
9457 crtl->drap_reg = arg_ptr;
9458
9459 start_sequence ();
9460 drap_vreg = copy_to_reg (arg_ptr);
9461 seq = get_insns ();
9462 end_sequence ();
9463
9464 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9465 if (!optimize)
9466 {
9467 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9468 RTX_FRAME_RELATED_P (insn) = 1;
9469 }
9470 return drap_vreg;
9471 }
9472 else
9473 return NULL;
9474 }
9475
9476 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9477
9478 static rtx
9479 ix86_internal_arg_pointer (void)
9480 {
9481 return virtual_incoming_args_rtx;
9482 }
9483
9484 struct scratch_reg {
9485 rtx reg;
9486 bool saved;
9487 };
9488
9489 /* Return a short-lived scratch register for use on function entry.
9490 In 32-bit mode, it is valid only after the registers are saved
9491 in the prologue. This register must be released by means of
9492 release_scratch_register_on_entry once it is dead. */
9493
9494 static void
9495 get_scratch_register_on_entry (struct scratch_reg *sr)
9496 {
9497 int regno;
9498
9499 sr->saved = false;
9500
9501 if (TARGET_64BIT)
9502 {
9503 /* We always use R11 in 64-bit mode. */
9504 regno = R11_REG;
9505 }
9506 else
9507 {
9508 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9509 bool fastcall_p
9510 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9511 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9512 int regparm = ix86_function_regparm (fntype, decl);
9513 int drap_regno
9514 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9515
9516 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9517 for the static chain register. */
9518 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9519 && drap_regno != AX_REG)
9520 regno = AX_REG;
9521 else if (regparm < 2 && drap_regno != DX_REG)
9522 regno = DX_REG;
9523 /* ecx is the static chain register. */
9524 else if (regparm < 3 && !fastcall_p && !static_chain_p
9525 && drap_regno != CX_REG)
9526 regno = CX_REG;
9527 else if (ix86_save_reg (BX_REG, true))
9528 regno = BX_REG;
9529 /* esi is the static chain register. */
9530 else if (!(regparm == 3 && static_chain_p)
9531 && ix86_save_reg (SI_REG, true))
9532 regno = SI_REG;
9533 else if (ix86_save_reg (DI_REG, true))
9534 regno = DI_REG;
9535 else
9536 {
9537 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9538 sr->saved = true;
9539 }
9540 }
9541
9542 sr->reg = gen_rtx_REG (Pmode, regno);
9543 if (sr->saved)
9544 {
9545 rtx insn = emit_insn (gen_push (sr->reg));
9546 RTX_FRAME_RELATED_P (insn) = 1;
9547 }
9548 }
9549
9550 /* Release a scratch register obtained from the preceding function. */
9551
9552 static void
9553 release_scratch_register_on_entry (struct scratch_reg *sr)
9554 {
9555 if (sr->saved)
9556 {
9557 rtx x, insn = emit_insn (gen_pop (sr->reg));
9558
9559 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9560 RTX_FRAME_RELATED_P (insn) = 1;
9561 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9562 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9564 }
9565 }
9566
9567 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9568
9569 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9570
9571 static void
9572 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9573 {
9574 /* We skip the probe for the first interval + a small dope of 4 words and
9575 probe that many bytes past the specified size to maintain a protection
9576 area at the botton of the stack. */
9577 const int dope = 4 * UNITS_PER_WORD;
9578 rtx size_rtx = GEN_INT (size), last;
9579
9580 /* See if we have a constant small number of probes to generate. If so,
9581 that's the easy case. The run-time loop is made up of 11 insns in the
9582 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9583 for n # of intervals. */
9584 if (size <= 5 * PROBE_INTERVAL)
9585 {
9586 HOST_WIDE_INT i, adjust;
9587 bool first_probe = true;
9588
9589 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9590 values of N from 1 until it exceeds SIZE. If only one probe is
9591 needed, this will not generate any code. Then adjust and probe
9592 to PROBE_INTERVAL + SIZE. */
9593 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9594 {
9595 if (first_probe)
9596 {
9597 adjust = 2 * PROBE_INTERVAL + dope;
9598 first_probe = false;
9599 }
9600 else
9601 adjust = PROBE_INTERVAL;
9602
9603 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9604 plus_constant (stack_pointer_rtx, -adjust)));
9605 emit_stack_probe (stack_pointer_rtx);
9606 }
9607
9608 if (first_probe)
9609 adjust = size + PROBE_INTERVAL + dope;
9610 else
9611 adjust = size + PROBE_INTERVAL - i;
9612
9613 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9614 plus_constant (stack_pointer_rtx, -adjust)));
9615 emit_stack_probe (stack_pointer_rtx);
9616
9617 /* Adjust back to account for the additional first interval. */
9618 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9619 plus_constant (stack_pointer_rtx,
9620 PROBE_INTERVAL + dope)));
9621 }
9622
9623 /* Otherwise, do the same as above, but in a loop. Note that we must be
9624 extra careful with variables wrapping around because we might be at
9625 the very top (or the very bottom) of the address space and we have
9626 to be able to handle this case properly; in particular, we use an
9627 equality test for the loop condition. */
9628 else
9629 {
9630 HOST_WIDE_INT rounded_size;
9631 struct scratch_reg sr;
9632
9633 get_scratch_register_on_entry (&sr);
9634
9635
9636 /* Step 1: round SIZE to the previous multiple of the interval. */
9637
9638 rounded_size = size & -PROBE_INTERVAL;
9639
9640
9641 /* Step 2: compute initial and final value of the loop counter. */
9642
9643 /* SP = SP_0 + PROBE_INTERVAL. */
9644 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9645 plus_constant (stack_pointer_rtx,
9646 - (PROBE_INTERVAL + dope))));
9647
9648 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9649 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9650 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9651 gen_rtx_PLUS (Pmode, sr.reg,
9652 stack_pointer_rtx)));
9653
9654
9655 /* Step 3: the loop
9656
9657 while (SP != LAST_ADDR)
9658 {
9659 SP = SP + PROBE_INTERVAL
9660 probe at SP
9661 }
9662
9663 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9664 values of N from 1 until it is equal to ROUNDED_SIZE. */
9665
9666 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9667
9668
9669 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9670 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9671
9672 if (size != rounded_size)
9673 {
9674 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9675 plus_constant (stack_pointer_rtx,
9676 rounded_size - size)));
9677 emit_stack_probe (stack_pointer_rtx);
9678 }
9679
9680 /* Adjust back to account for the additional first interval. */
9681 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9682 plus_constant (stack_pointer_rtx,
9683 PROBE_INTERVAL + dope)));
9684
9685 release_scratch_register_on_entry (&sr);
9686 }
9687
9688 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9689
9690 /* Even if the stack pointer isn't the CFA register, we need to correctly
9691 describe the adjustments made to it, in particular differentiate the
9692 frame-related ones from the frame-unrelated ones. */
9693 if (size > 0)
9694 {
9695 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9696 XVECEXP (expr, 0, 0)
9697 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9698 plus_constant (stack_pointer_rtx, -size));
9699 XVECEXP (expr, 0, 1)
9700 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9701 plus_constant (stack_pointer_rtx,
9702 PROBE_INTERVAL + dope + size));
9703 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9704 RTX_FRAME_RELATED_P (last) = 1;
9705
9706 cfun->machine->fs.sp_offset += size;
9707 }
9708
9709 /* Make sure nothing is scheduled before we are done. */
9710 emit_insn (gen_blockage ());
9711 }
9712
9713 /* Adjust the stack pointer up to REG while probing it. */
9714
9715 const char *
9716 output_adjust_stack_and_probe (rtx reg)
9717 {
9718 static int labelno = 0;
9719 char loop_lab[32], end_lab[32];
9720 rtx xops[2];
9721
9722 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9723 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9724
9725 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9726
9727 /* Jump to END_LAB if SP == LAST_ADDR. */
9728 xops[0] = stack_pointer_rtx;
9729 xops[1] = reg;
9730 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9731 fputs ("\tje\t", asm_out_file);
9732 assemble_name_raw (asm_out_file, end_lab);
9733 fputc ('\n', asm_out_file);
9734
9735 /* SP = SP + PROBE_INTERVAL. */
9736 xops[1] = GEN_INT (PROBE_INTERVAL);
9737 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9738
9739 /* Probe at SP. */
9740 xops[1] = const0_rtx;
9741 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9742
9743 fprintf (asm_out_file, "\tjmp\t");
9744 assemble_name_raw (asm_out_file, loop_lab);
9745 fputc ('\n', asm_out_file);
9746
9747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9748
9749 return "";
9750 }
9751
9752 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9753 inclusive. These are offsets from the current stack pointer. */
9754
9755 static void
9756 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9757 {
9758 /* See if we have a constant small number of probes to generate. If so,
9759 that's the easy case. The run-time loop is made up of 7 insns in the
9760 generic case while the compile-time loop is made up of n insns for n #
9761 of intervals. */
9762 if (size <= 7 * PROBE_INTERVAL)
9763 {
9764 HOST_WIDE_INT i;
9765
9766 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9767 it exceeds SIZE. If only one probe is needed, this will not
9768 generate any code. Then probe at FIRST + SIZE. */
9769 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9770 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9771
9772 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9773 }
9774
9775 /* Otherwise, do the same as above, but in a loop. Note that we must be
9776 extra careful with variables wrapping around because we might be at
9777 the very top (or the very bottom) of the address space and we have
9778 to be able to handle this case properly; in particular, we use an
9779 equality test for the loop condition. */
9780 else
9781 {
9782 HOST_WIDE_INT rounded_size, last;
9783 struct scratch_reg sr;
9784
9785 get_scratch_register_on_entry (&sr);
9786
9787
9788 /* Step 1: round SIZE to the previous multiple of the interval. */
9789
9790 rounded_size = size & -PROBE_INTERVAL;
9791
9792
9793 /* Step 2: compute initial and final value of the loop counter. */
9794
9795 /* TEST_OFFSET = FIRST. */
9796 emit_move_insn (sr.reg, GEN_INT (-first));
9797
9798 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9799 last = first + rounded_size;
9800
9801
9802 /* Step 3: the loop
9803
9804 while (TEST_ADDR != LAST_ADDR)
9805 {
9806 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9807 probe at TEST_ADDR
9808 }
9809
9810 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9811 until it is equal to ROUNDED_SIZE. */
9812
9813 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9814
9815
9816 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9817 that SIZE is equal to ROUNDED_SIZE. */
9818
9819 if (size != rounded_size)
9820 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9821 stack_pointer_rtx,
9822 sr.reg),
9823 rounded_size - size));
9824
9825 release_scratch_register_on_entry (&sr);
9826 }
9827
9828 /* Make sure nothing is scheduled before we are done. */
9829 emit_insn (gen_blockage ());
9830 }
9831
9832 /* Probe a range of stack addresses from REG to END, inclusive. These are
9833 offsets from the current stack pointer. */
9834
9835 const char *
9836 output_probe_stack_range (rtx reg, rtx end)
9837 {
9838 static int labelno = 0;
9839 char loop_lab[32], end_lab[32];
9840 rtx xops[3];
9841
9842 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9843 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9844
9845 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9846
9847 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9848 xops[0] = reg;
9849 xops[1] = end;
9850 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9851 fputs ("\tje\t", asm_out_file);
9852 assemble_name_raw (asm_out_file, end_lab);
9853 fputc ('\n', asm_out_file);
9854
9855 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9856 xops[1] = GEN_INT (PROBE_INTERVAL);
9857 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9858
9859 /* Probe at TEST_ADDR. */
9860 xops[0] = stack_pointer_rtx;
9861 xops[1] = reg;
9862 xops[2] = const0_rtx;
9863 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9864
9865 fprintf (asm_out_file, "\tjmp\t");
9866 assemble_name_raw (asm_out_file, loop_lab);
9867 fputc ('\n', asm_out_file);
9868
9869 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9870
9871 return "";
9872 }
9873
9874 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9875 to be generated in correct form. */
9876 static void
9877 ix86_finalize_stack_realign_flags (void)
9878 {
9879 /* Check if stack realign is really needed after reload, and
9880 stores result in cfun */
9881 unsigned int incoming_stack_boundary
9882 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9883 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9884 unsigned int stack_realign = (incoming_stack_boundary
9885 < (current_function_is_leaf
9886 ? crtl->max_used_stack_slot_alignment
9887 : crtl->stack_alignment_needed));
9888
9889 if (crtl->stack_realign_finalized)
9890 {
9891 /* After stack_realign_needed is finalized, we can't no longer
9892 change it. */
9893 gcc_assert (crtl->stack_realign_needed == stack_realign);
9894 }
9895 else
9896 {
9897 crtl->stack_realign_needed = stack_realign;
9898 crtl->stack_realign_finalized = true;
9899 }
9900 }
9901
9902 /* Expand the prologue into a bunch of separate insns. */
9903
9904 void
9905 ix86_expand_prologue (void)
9906 {
9907 struct machine_function *m = cfun->machine;
9908 rtx insn, t;
9909 bool pic_reg_used;
9910 struct ix86_frame frame;
9911 HOST_WIDE_INT allocate;
9912 bool int_registers_saved;
9913
9914 ix86_finalize_stack_realign_flags ();
9915
9916 /* DRAP should not coexist with stack_realign_fp */
9917 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9918
9919 memset (&m->fs, 0, sizeof (m->fs));
9920
9921 /* Initialize CFA state for before the prologue. */
9922 m->fs.cfa_reg = stack_pointer_rtx;
9923 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9924
9925 /* Track SP offset to the CFA. We continue tracking this after we've
9926 swapped the CFA register away from SP. In the case of re-alignment
9927 this is fudged; we're interested to offsets within the local frame. */
9928 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9929 m->fs.sp_valid = true;
9930
9931 ix86_compute_frame_layout (&frame);
9932
9933 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9934 {
9935 /* We should have already generated an error for any use of
9936 ms_hook on a nested function. */
9937 gcc_checking_assert (!ix86_static_chain_on_stack);
9938
9939 /* Check if profiling is active and we shall use profiling before
9940 prologue variant. If so sorry. */
9941 if (crtl->profile && flag_fentry != 0)
9942 sorry ("ms_hook_prologue attribute isn%'t compatible "
9943 "with -mfentry for 32-bit");
9944
9945 /* In ix86_asm_output_function_label we emitted:
9946 8b ff movl.s %edi,%edi
9947 55 push %ebp
9948 8b ec movl.s %esp,%ebp
9949
9950 This matches the hookable function prologue in Win32 API
9951 functions in Microsoft Windows XP Service Pack 2 and newer.
9952 Wine uses this to enable Windows apps to hook the Win32 API
9953 functions provided by Wine.
9954
9955 What that means is that we've already set up the frame pointer. */
9956
9957 if (frame_pointer_needed
9958 && !(crtl->drap_reg && crtl->stack_realign_needed))
9959 {
9960 rtx push, mov;
9961
9962 /* We've decided to use the frame pointer already set up.
9963 Describe this to the unwinder by pretending that both
9964 push and mov insns happen right here.
9965
9966 Putting the unwind info here at the end of the ms_hook
9967 is done so that we can make absolutely certain we get
9968 the required byte sequence at the start of the function,
9969 rather than relying on an assembler that can produce
9970 the exact encoding required.
9971
9972 However it does mean (in the unpatched case) that we have
9973 a 1 insn window where the asynchronous unwind info is
9974 incorrect. However, if we placed the unwind info at
9975 its correct location we would have incorrect unwind info
9976 in the patched case. Which is probably all moot since
9977 I don't expect Wine generates dwarf2 unwind info for the
9978 system libraries that use this feature. */
9979
9980 insn = emit_insn (gen_blockage ());
9981
9982 push = gen_push (hard_frame_pointer_rtx);
9983 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9984 stack_pointer_rtx);
9985 RTX_FRAME_RELATED_P (push) = 1;
9986 RTX_FRAME_RELATED_P (mov) = 1;
9987
9988 RTX_FRAME_RELATED_P (insn) = 1;
9989 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9990 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9991
9992 /* Note that gen_push incremented m->fs.cfa_offset, even
9993 though we didn't emit the push insn here. */
9994 m->fs.cfa_reg = hard_frame_pointer_rtx;
9995 m->fs.fp_offset = m->fs.cfa_offset;
9996 m->fs.fp_valid = true;
9997 }
9998 else
9999 {
10000 /* The frame pointer is not needed so pop %ebp again.
10001 This leaves us with a pristine state. */
10002 emit_insn (gen_pop (hard_frame_pointer_rtx));
10003 }
10004 }
10005
10006 /* The first insn of a function that accepts its static chain on the
10007 stack is to push the register that would be filled in by a direct
10008 call. This insn will be skipped by the trampoline. */
10009 else if (ix86_static_chain_on_stack)
10010 {
10011 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10012 emit_insn (gen_blockage ());
10013
10014 /* We don't want to interpret this push insn as a register save,
10015 only as a stack adjustment. The real copy of the register as
10016 a save will be done later, if needed. */
10017 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10018 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10019 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10020 RTX_FRAME_RELATED_P (insn) = 1;
10021 }
10022
10023 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10024 of DRAP is needed and stack realignment is really needed after reload */
10025 if (stack_realign_drap)
10026 {
10027 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10028
10029 /* Only need to push parameter pointer reg if it is caller saved. */
10030 if (!call_used_regs[REGNO (crtl->drap_reg)])
10031 {
10032 /* Push arg pointer reg */
10033 insn = emit_insn (gen_push (crtl->drap_reg));
10034 RTX_FRAME_RELATED_P (insn) = 1;
10035 }
10036
10037 /* Grab the argument pointer. */
10038 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10039 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10040 RTX_FRAME_RELATED_P (insn) = 1;
10041 m->fs.cfa_reg = crtl->drap_reg;
10042 m->fs.cfa_offset = 0;
10043
10044 /* Align the stack. */
10045 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10046 stack_pointer_rtx,
10047 GEN_INT (-align_bytes)));
10048 RTX_FRAME_RELATED_P (insn) = 1;
10049
10050 /* Replicate the return address on the stack so that return
10051 address can be reached via (argp - 1) slot. This is needed
10052 to implement macro RETURN_ADDR_RTX and intrinsic function
10053 expand_builtin_return_addr etc. */
10054 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10055 t = gen_frame_mem (Pmode, t);
10056 insn = emit_insn (gen_push (t));
10057 RTX_FRAME_RELATED_P (insn) = 1;
10058
10059 /* For the purposes of frame and register save area addressing,
10060 we've started over with a new frame. */
10061 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10062 m->fs.realigned = true;
10063 }
10064
10065 if (frame_pointer_needed && !m->fs.fp_valid)
10066 {
10067 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10068 slower on all targets. Also sdb doesn't like it. */
10069 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10070 RTX_FRAME_RELATED_P (insn) = 1;
10071
10072 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10073 {
10074 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10075 RTX_FRAME_RELATED_P (insn) = 1;
10076
10077 if (m->fs.cfa_reg == stack_pointer_rtx)
10078 m->fs.cfa_reg = hard_frame_pointer_rtx;
10079 m->fs.fp_offset = m->fs.sp_offset;
10080 m->fs.fp_valid = true;
10081 }
10082 }
10083
10084 int_registers_saved = (frame.nregs == 0);
10085
10086 if (!int_registers_saved)
10087 {
10088 /* If saving registers via PUSH, do so now. */
10089 if (!frame.save_regs_using_mov)
10090 {
10091 ix86_emit_save_regs ();
10092 int_registers_saved = true;
10093 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10094 }
10095
10096 /* When using red zone we may start register saving before allocating
10097 the stack frame saving one cycle of the prologue. However, avoid
10098 doing this if we have to probe the stack; at least on x86_64 the
10099 stack probe can turn into a call that clobbers a red zone location. */
10100 else if (ix86_using_red_zone ()
10101 && (! TARGET_STACK_PROBE
10102 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10103 {
10104 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10105 int_registers_saved = true;
10106 }
10107 }
10108
10109 if (stack_realign_fp)
10110 {
10111 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10112 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10113
10114 /* The computation of the size of the re-aligned stack frame means
10115 that we must allocate the size of the register save area before
10116 performing the actual alignment. Otherwise we cannot guarantee
10117 that there's enough storage above the realignment point. */
10118 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10119 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10120 GEN_INT (m->fs.sp_offset
10121 - frame.sse_reg_save_offset),
10122 -1, false);
10123
10124 /* Align the stack. */
10125 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10126 stack_pointer_rtx,
10127 GEN_INT (-align_bytes)));
10128
10129 /* For the purposes of register save area addressing, the stack
10130 pointer is no longer valid. As for the value of sp_offset,
10131 see ix86_compute_frame_layout, which we need to match in order
10132 to pass verification of stack_pointer_offset at the end. */
10133 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10134 m->fs.sp_valid = false;
10135 }
10136
10137 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10138
10139 if (flag_stack_usage_info)
10140 {
10141 /* We start to count from ARG_POINTER. */
10142 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10143
10144 /* If it was realigned, take into account the fake frame. */
10145 if (stack_realign_drap)
10146 {
10147 if (ix86_static_chain_on_stack)
10148 stack_size += UNITS_PER_WORD;
10149
10150 if (!call_used_regs[REGNO (crtl->drap_reg)])
10151 stack_size += UNITS_PER_WORD;
10152
10153 /* This over-estimates by 1 minimal-stack-alignment-unit but
10154 mitigates that by counting in the new return address slot. */
10155 current_function_dynamic_stack_size
10156 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10157 }
10158
10159 current_function_static_stack_size = stack_size;
10160 }
10161
10162 /* The stack has already been decremented by the instruction calling us
10163 so probe if the size is non-negative to preserve the protection area. */
10164 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10165 {
10166 /* We expect the registers to be saved when probes are used. */
10167 gcc_assert (int_registers_saved);
10168
10169 if (STACK_CHECK_MOVING_SP)
10170 {
10171 ix86_adjust_stack_and_probe (allocate);
10172 allocate = 0;
10173 }
10174 else
10175 {
10176 HOST_WIDE_INT size = allocate;
10177
10178 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10179 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10180
10181 if (TARGET_STACK_PROBE)
10182 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10183 else
10184 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10185 }
10186 }
10187
10188 if (allocate == 0)
10189 ;
10190 else if (!ix86_target_stack_probe ()
10191 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10192 {
10193 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10194 GEN_INT (-allocate), -1,
10195 m->fs.cfa_reg == stack_pointer_rtx);
10196 }
10197 else
10198 {
10199 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10200 rtx r10 = NULL;
10201 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10202
10203 bool eax_live = false;
10204 bool r10_live = false;
10205
10206 if (TARGET_64BIT)
10207 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10208 if (!TARGET_64BIT_MS_ABI)
10209 eax_live = ix86_eax_live_at_start_p ();
10210
10211 if (eax_live)
10212 {
10213 emit_insn (gen_push (eax));
10214 allocate -= UNITS_PER_WORD;
10215 }
10216 if (r10_live)
10217 {
10218 r10 = gen_rtx_REG (Pmode, R10_REG);
10219 emit_insn (gen_push (r10));
10220 allocate -= UNITS_PER_WORD;
10221 }
10222
10223 emit_move_insn (eax, GEN_INT (allocate));
10224 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10225
10226 /* Use the fact that AX still contains ALLOCATE. */
10227 adjust_stack_insn = (TARGET_64BIT
10228 ? gen_pro_epilogue_adjust_stack_di_sub
10229 : gen_pro_epilogue_adjust_stack_si_sub);
10230
10231 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10232 stack_pointer_rtx, eax));
10233
10234 /* Note that SEH directives need to continue tracking the stack
10235 pointer even after the frame pointer has been set up. */
10236 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10237 {
10238 if (m->fs.cfa_reg == stack_pointer_rtx)
10239 m->fs.cfa_offset += allocate;
10240
10241 RTX_FRAME_RELATED_P (insn) = 1;
10242 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10243 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10244 plus_constant (stack_pointer_rtx,
10245 -allocate)));
10246 }
10247 m->fs.sp_offset += allocate;
10248
10249 if (r10_live && eax_live)
10250 {
10251 t = choose_baseaddr (m->fs.sp_offset - allocate);
10252 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10253 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10254 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10255 }
10256 else if (eax_live || r10_live)
10257 {
10258 t = choose_baseaddr (m->fs.sp_offset - allocate);
10259 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10260 }
10261 }
10262 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10263
10264 /* If we havn't already set up the frame pointer, do so now. */
10265 if (frame_pointer_needed && !m->fs.fp_valid)
10266 {
10267 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10268 GEN_INT (frame.stack_pointer_offset
10269 - frame.hard_frame_pointer_offset));
10270 insn = emit_insn (insn);
10271 RTX_FRAME_RELATED_P (insn) = 1;
10272 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10273
10274 if (m->fs.cfa_reg == stack_pointer_rtx)
10275 m->fs.cfa_reg = hard_frame_pointer_rtx;
10276 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10277 m->fs.fp_valid = true;
10278 }
10279
10280 if (!int_registers_saved)
10281 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10282 if (frame.nsseregs)
10283 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10284
10285 pic_reg_used = false;
10286 if (pic_offset_table_rtx
10287 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10288 || crtl->profile))
10289 {
10290 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10291
10292 if (alt_pic_reg_used != INVALID_REGNUM)
10293 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10294
10295 pic_reg_used = true;
10296 }
10297
10298 if (pic_reg_used)
10299 {
10300 if (TARGET_64BIT)
10301 {
10302 if (ix86_cmodel == CM_LARGE_PIC)
10303 {
10304 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10305 rtx label = gen_label_rtx ();
10306 emit_label (label);
10307 LABEL_PRESERVE_P (label) = 1;
10308 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10309 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10310 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10311 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10312 pic_offset_table_rtx, tmp_reg));
10313 }
10314 else
10315 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10316 }
10317 else
10318 {
10319 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10320 RTX_FRAME_RELATED_P (insn) = 1;
10321 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10322 }
10323 }
10324
10325 /* In the pic_reg_used case, make sure that the got load isn't deleted
10326 when mcount needs it. Blockage to avoid call movement across mcount
10327 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10328 note. */
10329 if (crtl->profile && !flag_fentry && pic_reg_used)
10330 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10331
10332 if (crtl->drap_reg && !crtl->stack_realign_needed)
10333 {
10334 /* vDRAP is setup but after reload it turns out stack realign
10335 isn't necessary, here we will emit prologue to setup DRAP
10336 without stack realign adjustment */
10337 t = choose_baseaddr (0);
10338 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10339 }
10340
10341 /* Prevent instructions from being scheduled into register save push
10342 sequence when access to the redzone area is done through frame pointer.
10343 The offset between the frame pointer and the stack pointer is calculated
10344 relative to the value of the stack pointer at the end of the function
10345 prologue, and moving instructions that access redzone area via frame
10346 pointer inside push sequence violates this assumption. */
10347 if (frame_pointer_needed && frame.red_zone_size)
10348 emit_insn (gen_memory_blockage ());
10349
10350 /* Emit cld instruction if stringops are used in the function. */
10351 if (TARGET_CLD && ix86_current_function_needs_cld)
10352 emit_insn (gen_cld ());
10353
10354 /* SEH requires that the prologue end within 256 bytes of the start of
10355 the function. Prevent instruction schedules that would extend that.
10356 Further, prevent alloca modifications to the stack pointer from being
10357 combined with prologue modifications. */
10358 if (TARGET_SEH)
10359 emit_insn (gen_prologue_use (stack_pointer_rtx));
10360 }
10361
10362 /* Emit code to restore REG using a POP insn. */
10363
10364 static void
10365 ix86_emit_restore_reg_using_pop (rtx reg)
10366 {
10367 struct machine_function *m = cfun->machine;
10368 rtx insn = emit_insn (gen_pop (reg));
10369
10370 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10371 m->fs.sp_offset -= UNITS_PER_WORD;
10372
10373 if (m->fs.cfa_reg == crtl->drap_reg
10374 && REGNO (reg) == REGNO (crtl->drap_reg))
10375 {
10376 /* Previously we'd represented the CFA as an expression
10377 like *(%ebp - 8). We've just popped that value from
10378 the stack, which means we need to reset the CFA to
10379 the drap register. This will remain until we restore
10380 the stack pointer. */
10381 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10382 RTX_FRAME_RELATED_P (insn) = 1;
10383
10384 /* This means that the DRAP register is valid for addressing too. */
10385 m->fs.drap_valid = true;
10386 return;
10387 }
10388
10389 if (m->fs.cfa_reg == stack_pointer_rtx)
10390 {
10391 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10392 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10393 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10394 RTX_FRAME_RELATED_P (insn) = 1;
10395
10396 m->fs.cfa_offset -= UNITS_PER_WORD;
10397 }
10398
10399 /* When the frame pointer is the CFA, and we pop it, we are
10400 swapping back to the stack pointer as the CFA. This happens
10401 for stack frames that don't allocate other data, so we assume
10402 the stack pointer is now pointing at the return address, i.e.
10403 the function entry state, which makes the offset be 1 word. */
10404 if (reg == hard_frame_pointer_rtx)
10405 {
10406 m->fs.fp_valid = false;
10407 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10408 {
10409 m->fs.cfa_reg = stack_pointer_rtx;
10410 m->fs.cfa_offset -= UNITS_PER_WORD;
10411
10412 add_reg_note (insn, REG_CFA_DEF_CFA,
10413 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10414 GEN_INT (m->fs.cfa_offset)));
10415 RTX_FRAME_RELATED_P (insn) = 1;
10416 }
10417 }
10418 }
10419
10420 /* Emit code to restore saved registers using POP insns. */
10421
10422 static void
10423 ix86_emit_restore_regs_using_pop (void)
10424 {
10425 unsigned int regno;
10426
10427 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10428 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10429 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10430 }
10431
10432 /* Emit code and notes for the LEAVE instruction. */
10433
10434 static void
10435 ix86_emit_leave (void)
10436 {
10437 struct machine_function *m = cfun->machine;
10438 rtx insn = emit_insn (ix86_gen_leave ());
10439
10440 ix86_add_queued_cfa_restore_notes (insn);
10441
10442 gcc_assert (m->fs.fp_valid);
10443 m->fs.sp_valid = true;
10444 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10445 m->fs.fp_valid = false;
10446
10447 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10448 {
10449 m->fs.cfa_reg = stack_pointer_rtx;
10450 m->fs.cfa_offset = m->fs.sp_offset;
10451
10452 add_reg_note (insn, REG_CFA_DEF_CFA,
10453 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10454 RTX_FRAME_RELATED_P (insn) = 1;
10455 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10456 m->fs.fp_offset);
10457 }
10458 }
10459
10460 /* Emit code to restore saved registers using MOV insns.
10461 First register is restored from CFA - CFA_OFFSET. */
10462 static void
10463 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10464 bool maybe_eh_return)
10465 {
10466 struct machine_function *m = cfun->machine;
10467 unsigned int regno;
10468
10469 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10470 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10471 {
10472 rtx reg = gen_rtx_REG (Pmode, regno);
10473 rtx insn, mem;
10474
10475 mem = choose_baseaddr (cfa_offset);
10476 mem = gen_frame_mem (Pmode, mem);
10477 insn = emit_move_insn (reg, mem);
10478
10479 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10480 {
10481 /* Previously we'd represented the CFA as an expression
10482 like *(%ebp - 8). We've just popped that value from
10483 the stack, which means we need to reset the CFA to
10484 the drap register. This will remain until we restore
10485 the stack pointer. */
10486 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10487 RTX_FRAME_RELATED_P (insn) = 1;
10488
10489 /* This means that the DRAP register is valid for addressing. */
10490 m->fs.drap_valid = true;
10491 }
10492 else
10493 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10494
10495 cfa_offset -= UNITS_PER_WORD;
10496 }
10497 }
10498
10499 /* Emit code to restore saved registers using MOV insns.
10500 First register is restored from CFA - CFA_OFFSET. */
10501 static void
10502 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10503 bool maybe_eh_return)
10504 {
10505 unsigned int regno;
10506
10507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10508 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10509 {
10510 rtx reg = gen_rtx_REG (V4SFmode, regno);
10511 rtx mem;
10512
10513 mem = choose_baseaddr (cfa_offset);
10514 mem = gen_rtx_MEM (V4SFmode, mem);
10515 set_mem_align (mem, 128);
10516 emit_move_insn (reg, mem);
10517
10518 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10519
10520 cfa_offset -= 16;
10521 }
10522 }
10523
10524 /* Restore function stack, frame, and registers. */
10525
10526 void
10527 ix86_expand_epilogue (int style)
10528 {
10529 struct machine_function *m = cfun->machine;
10530 struct machine_frame_state frame_state_save = m->fs;
10531 struct ix86_frame frame;
10532 bool restore_regs_via_mov;
10533 bool using_drap;
10534
10535 ix86_finalize_stack_realign_flags ();
10536 ix86_compute_frame_layout (&frame);
10537
10538 m->fs.sp_valid = (!frame_pointer_needed
10539 || (current_function_sp_is_unchanging
10540 && !stack_realign_fp));
10541 gcc_assert (!m->fs.sp_valid
10542 || m->fs.sp_offset == frame.stack_pointer_offset);
10543
10544 /* The FP must be valid if the frame pointer is present. */
10545 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10546 gcc_assert (!m->fs.fp_valid
10547 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10548
10549 /* We must have *some* valid pointer to the stack frame. */
10550 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10551
10552 /* The DRAP is never valid at this point. */
10553 gcc_assert (!m->fs.drap_valid);
10554
10555 /* See the comment about red zone and frame
10556 pointer usage in ix86_expand_prologue. */
10557 if (frame_pointer_needed && frame.red_zone_size)
10558 emit_insn (gen_memory_blockage ());
10559
10560 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10561 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10562
10563 /* Determine the CFA offset of the end of the red-zone. */
10564 m->fs.red_zone_offset = 0;
10565 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10566 {
10567 /* The red-zone begins below the return address. */
10568 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10569
10570 /* When the register save area is in the aligned portion of
10571 the stack, determine the maximum runtime displacement that
10572 matches up with the aligned frame. */
10573 if (stack_realign_drap)
10574 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10575 + UNITS_PER_WORD);
10576 }
10577
10578 /* Special care must be taken for the normal return case of a function
10579 using eh_return: the eax and edx registers are marked as saved, but
10580 not restored along this path. Adjust the save location to match. */
10581 if (crtl->calls_eh_return && style != 2)
10582 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10583
10584 /* EH_RETURN requires the use of moves to function properly. */
10585 if (crtl->calls_eh_return)
10586 restore_regs_via_mov = true;
10587 /* SEH requires the use of pops to identify the epilogue. */
10588 else if (TARGET_SEH)
10589 restore_regs_via_mov = false;
10590 /* If we're only restoring one register and sp is not valid then
10591 using a move instruction to restore the register since it's
10592 less work than reloading sp and popping the register. */
10593 else if (!m->fs.sp_valid && frame.nregs <= 1)
10594 restore_regs_via_mov = true;
10595 else if (TARGET_EPILOGUE_USING_MOVE
10596 && cfun->machine->use_fast_prologue_epilogue
10597 && (frame.nregs > 1
10598 || m->fs.sp_offset != frame.reg_save_offset))
10599 restore_regs_via_mov = true;
10600 else if (frame_pointer_needed
10601 && !frame.nregs
10602 && m->fs.sp_offset != frame.reg_save_offset)
10603 restore_regs_via_mov = true;
10604 else if (frame_pointer_needed
10605 && TARGET_USE_LEAVE
10606 && cfun->machine->use_fast_prologue_epilogue
10607 && frame.nregs == 1)
10608 restore_regs_via_mov = true;
10609 else
10610 restore_regs_via_mov = false;
10611
10612 if (restore_regs_via_mov || frame.nsseregs)
10613 {
10614 /* Ensure that the entire register save area is addressable via
10615 the stack pointer, if we will restore via sp. */
10616 if (TARGET_64BIT
10617 && m->fs.sp_offset > 0x7fffffff
10618 && !(m->fs.fp_valid || m->fs.drap_valid)
10619 && (frame.nsseregs + frame.nregs) != 0)
10620 {
10621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10622 GEN_INT (m->fs.sp_offset
10623 - frame.sse_reg_save_offset),
10624 style,
10625 m->fs.cfa_reg == stack_pointer_rtx);
10626 }
10627 }
10628
10629 /* If there are any SSE registers to restore, then we have to do it
10630 via moves, since there's obviously no pop for SSE regs. */
10631 if (frame.nsseregs)
10632 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10633 style == 2);
10634
10635 if (restore_regs_via_mov)
10636 {
10637 rtx t;
10638
10639 if (frame.nregs)
10640 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10641
10642 /* eh_return epilogues need %ecx added to the stack pointer. */
10643 if (style == 2)
10644 {
10645 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10646
10647 /* Stack align doesn't work with eh_return. */
10648 gcc_assert (!stack_realign_drap);
10649 /* Neither does regparm nested functions. */
10650 gcc_assert (!ix86_static_chain_on_stack);
10651
10652 if (frame_pointer_needed)
10653 {
10654 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10655 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10656 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10657
10658 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10659 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10660
10661 /* Note that we use SA as a temporary CFA, as the return
10662 address is at the proper place relative to it. We
10663 pretend this happens at the FP restore insn because
10664 prior to this insn the FP would be stored at the wrong
10665 offset relative to SA, and after this insn we have no
10666 other reasonable register to use for the CFA. We don't
10667 bother resetting the CFA to the SP for the duration of
10668 the return insn. */
10669 add_reg_note (insn, REG_CFA_DEF_CFA,
10670 plus_constant (sa, UNITS_PER_WORD));
10671 ix86_add_queued_cfa_restore_notes (insn);
10672 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10673 RTX_FRAME_RELATED_P (insn) = 1;
10674
10675 m->fs.cfa_reg = sa;
10676 m->fs.cfa_offset = UNITS_PER_WORD;
10677 m->fs.fp_valid = false;
10678
10679 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10680 const0_rtx, style, false);
10681 }
10682 else
10683 {
10684 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10685 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10686 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10687 ix86_add_queued_cfa_restore_notes (insn);
10688
10689 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10690 if (m->fs.cfa_offset != UNITS_PER_WORD)
10691 {
10692 m->fs.cfa_offset = UNITS_PER_WORD;
10693 add_reg_note (insn, REG_CFA_DEF_CFA,
10694 plus_constant (stack_pointer_rtx,
10695 UNITS_PER_WORD));
10696 RTX_FRAME_RELATED_P (insn) = 1;
10697 }
10698 }
10699 m->fs.sp_offset = UNITS_PER_WORD;
10700 m->fs.sp_valid = true;
10701 }
10702 }
10703 else
10704 {
10705 /* SEH requires that the function end with (1) a stack adjustment
10706 if necessary, (2) a sequence of pops, and (3) a return or
10707 jump instruction. Prevent insns from the function body from
10708 being scheduled into this sequence. */
10709 if (TARGET_SEH)
10710 {
10711 /* Prevent a catch region from being adjacent to the standard
10712 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10713 several other flags that would be interesting to test are
10714 not yet set up. */
10715 if (flag_non_call_exceptions)
10716 emit_insn (gen_nops (const1_rtx));
10717 else
10718 emit_insn (gen_blockage ());
10719 }
10720
10721 /* First step is to deallocate the stack frame so that we can
10722 pop the registers. */
10723 if (!m->fs.sp_valid)
10724 {
10725 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10726 GEN_INT (m->fs.fp_offset
10727 - frame.reg_save_offset),
10728 style, false);
10729 }
10730 else if (m->fs.sp_offset != frame.reg_save_offset)
10731 {
10732 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10733 GEN_INT (m->fs.sp_offset
10734 - frame.reg_save_offset),
10735 style,
10736 m->fs.cfa_reg == stack_pointer_rtx);
10737 }
10738
10739 ix86_emit_restore_regs_using_pop ();
10740 }
10741
10742 /* If we used a stack pointer and haven't already got rid of it,
10743 then do so now. */
10744 if (m->fs.fp_valid)
10745 {
10746 /* If the stack pointer is valid and pointing at the frame
10747 pointer store address, then we only need a pop. */
10748 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10749 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10750 /* Leave results in shorter dependency chains on CPUs that are
10751 able to grok it fast. */
10752 else if (TARGET_USE_LEAVE
10753 || optimize_function_for_size_p (cfun)
10754 || !cfun->machine->use_fast_prologue_epilogue)
10755 ix86_emit_leave ();
10756 else
10757 {
10758 pro_epilogue_adjust_stack (stack_pointer_rtx,
10759 hard_frame_pointer_rtx,
10760 const0_rtx, style, !using_drap);
10761 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10762 }
10763 }
10764
10765 if (using_drap)
10766 {
10767 int param_ptr_offset = UNITS_PER_WORD;
10768 rtx insn;
10769
10770 gcc_assert (stack_realign_drap);
10771
10772 if (ix86_static_chain_on_stack)
10773 param_ptr_offset += UNITS_PER_WORD;
10774 if (!call_used_regs[REGNO (crtl->drap_reg)])
10775 param_ptr_offset += UNITS_PER_WORD;
10776
10777 insn = emit_insn (gen_rtx_SET
10778 (VOIDmode, stack_pointer_rtx,
10779 gen_rtx_PLUS (Pmode,
10780 crtl->drap_reg,
10781 GEN_INT (-param_ptr_offset))));
10782 m->fs.cfa_reg = stack_pointer_rtx;
10783 m->fs.cfa_offset = param_ptr_offset;
10784 m->fs.sp_offset = param_ptr_offset;
10785 m->fs.realigned = false;
10786
10787 add_reg_note (insn, REG_CFA_DEF_CFA,
10788 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10789 GEN_INT (param_ptr_offset)));
10790 RTX_FRAME_RELATED_P (insn) = 1;
10791
10792 if (!call_used_regs[REGNO (crtl->drap_reg)])
10793 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10794 }
10795
10796 /* At this point the stack pointer must be valid, and we must have
10797 restored all of the registers. We may not have deallocated the
10798 entire stack frame. We've delayed this until now because it may
10799 be possible to merge the local stack deallocation with the
10800 deallocation forced by ix86_static_chain_on_stack. */
10801 gcc_assert (m->fs.sp_valid);
10802 gcc_assert (!m->fs.fp_valid);
10803 gcc_assert (!m->fs.realigned);
10804 if (m->fs.sp_offset != UNITS_PER_WORD)
10805 {
10806 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10807 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10808 style, true);
10809 }
10810 else
10811 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10812
10813 /* Sibcall epilogues don't want a return instruction. */
10814 if (style == 0)
10815 {
10816 m->fs = frame_state_save;
10817 return;
10818 }
10819
10820 /* Emit vzeroupper if needed. */
10821 if (TARGET_VZEROUPPER
10822 && !TREE_THIS_VOLATILE (cfun->decl)
10823 && !cfun->machine->caller_return_avx256_p)
10824 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10825
10826 if (crtl->args.pops_args && crtl->args.size)
10827 {
10828 rtx popc = GEN_INT (crtl->args.pops_args);
10829
10830 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10831 address, do explicit add, and jump indirectly to the caller. */
10832
10833 if (crtl->args.pops_args >= 65536)
10834 {
10835 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10836 rtx insn;
10837
10838 /* There is no "pascal" calling convention in any 64bit ABI. */
10839 gcc_assert (!TARGET_64BIT);
10840
10841 insn = emit_insn (gen_pop (ecx));
10842 m->fs.cfa_offset -= UNITS_PER_WORD;
10843 m->fs.sp_offset -= UNITS_PER_WORD;
10844
10845 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10846 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10847 add_reg_note (insn, REG_CFA_REGISTER,
10848 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10849 RTX_FRAME_RELATED_P (insn) = 1;
10850
10851 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10852 popc, -1, true);
10853 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10854 }
10855 else
10856 emit_jump_insn (gen_simple_return_pop_internal (popc));
10857 }
10858 else
10859 emit_jump_insn (gen_simple_return_internal ());
10860
10861 /* Restore the state back to the state from the prologue,
10862 so that it's correct for the next epilogue. */
10863 m->fs = frame_state_save;
10864 }
10865
10866 /* Reset from the function's potential modifications. */
10867
10868 static void
10869 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10870 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10871 {
10872 if (pic_offset_table_rtx)
10873 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10874 #if TARGET_MACHO
10875 /* Mach-O doesn't support labels at the end of objects, so if
10876 it looks like we might want one, insert a NOP. */
10877 {
10878 rtx insn = get_last_insn ();
10879 while (insn
10880 && NOTE_P (insn)
10881 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10882 insn = PREV_INSN (insn);
10883 if (insn
10884 && (LABEL_P (insn)
10885 || (NOTE_P (insn)
10886 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10887 fputs ("\tnop\n", file);
10888 }
10889 #endif
10890
10891 }
10892
10893 /* Return a scratch register to use in the split stack prologue. The
10894 split stack prologue is used for -fsplit-stack. It is the first
10895 instructions in the function, even before the regular prologue.
10896 The scratch register can be any caller-saved register which is not
10897 used for parameters or for the static chain. */
10898
10899 static unsigned int
10900 split_stack_prologue_scratch_regno (void)
10901 {
10902 if (TARGET_64BIT)
10903 return R11_REG;
10904 else
10905 {
10906 bool is_fastcall;
10907 int regparm;
10908
10909 is_fastcall = (lookup_attribute ("fastcall",
10910 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10911 != NULL);
10912 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10913
10914 if (is_fastcall)
10915 {
10916 if (DECL_STATIC_CHAIN (cfun->decl))
10917 {
10918 sorry ("-fsplit-stack does not support fastcall with "
10919 "nested function");
10920 return INVALID_REGNUM;
10921 }
10922 return AX_REG;
10923 }
10924 else if (regparm < 3)
10925 {
10926 if (!DECL_STATIC_CHAIN (cfun->decl))
10927 return CX_REG;
10928 else
10929 {
10930 if (regparm >= 2)
10931 {
10932 sorry ("-fsplit-stack does not support 2 register "
10933 " parameters for a nested function");
10934 return INVALID_REGNUM;
10935 }
10936 return DX_REG;
10937 }
10938 }
10939 else
10940 {
10941 /* FIXME: We could make this work by pushing a register
10942 around the addition and comparison. */
10943 sorry ("-fsplit-stack does not support 3 register parameters");
10944 return INVALID_REGNUM;
10945 }
10946 }
10947 }
10948
10949 /* A SYMBOL_REF for the function which allocates new stackspace for
10950 -fsplit-stack. */
10951
10952 static GTY(()) rtx split_stack_fn;
10953
10954 /* A SYMBOL_REF for the more stack function when using the large
10955 model. */
10956
10957 static GTY(()) rtx split_stack_fn_large;
10958
10959 /* Handle -fsplit-stack. These are the first instructions in the
10960 function, even before the regular prologue. */
10961
10962 void
10963 ix86_expand_split_stack_prologue (void)
10964 {
10965 struct ix86_frame frame;
10966 HOST_WIDE_INT allocate;
10967 unsigned HOST_WIDE_INT args_size;
10968 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10969 rtx scratch_reg = NULL_RTX;
10970 rtx varargs_label = NULL_RTX;
10971 rtx fn;
10972
10973 gcc_assert (flag_split_stack && reload_completed);
10974
10975 ix86_finalize_stack_realign_flags ();
10976 ix86_compute_frame_layout (&frame);
10977 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10978
10979 /* This is the label we will branch to if we have enough stack
10980 space. We expect the basic block reordering pass to reverse this
10981 branch if optimizing, so that we branch in the unlikely case. */
10982 label = gen_label_rtx ();
10983
10984 /* We need to compare the stack pointer minus the frame size with
10985 the stack boundary in the TCB. The stack boundary always gives
10986 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10987 can compare directly. Otherwise we need to do an addition. */
10988
10989 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10990 UNSPEC_STACK_CHECK);
10991 limit = gen_rtx_CONST (Pmode, limit);
10992 limit = gen_rtx_MEM (Pmode, limit);
10993 if (allocate < SPLIT_STACK_AVAILABLE)
10994 current = stack_pointer_rtx;
10995 else
10996 {
10997 unsigned int scratch_regno;
10998 rtx offset;
10999
11000 /* We need a scratch register to hold the stack pointer minus
11001 the required frame size. Since this is the very start of the
11002 function, the scratch register can be any caller-saved
11003 register which is not used for parameters. */
11004 offset = GEN_INT (- allocate);
11005 scratch_regno = split_stack_prologue_scratch_regno ();
11006 if (scratch_regno == INVALID_REGNUM)
11007 return;
11008 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11009 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11010 {
11011 /* We don't use ix86_gen_add3 in this case because it will
11012 want to split to lea, but when not optimizing the insn
11013 will not be split after this point. */
11014 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11015 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11016 offset)));
11017 }
11018 else
11019 {
11020 emit_move_insn (scratch_reg, offset);
11021 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11022 stack_pointer_rtx));
11023 }
11024 current = scratch_reg;
11025 }
11026
11027 ix86_expand_branch (GEU, current, limit, label);
11028 jump_insn = get_last_insn ();
11029 JUMP_LABEL (jump_insn) = label;
11030
11031 /* Mark the jump as very likely to be taken. */
11032 add_reg_note (jump_insn, REG_BR_PROB,
11033 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11034
11035 if (split_stack_fn == NULL_RTX)
11036 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11037 fn = split_stack_fn;
11038
11039 /* Get more stack space. We pass in the desired stack space and the
11040 size of the arguments to copy to the new stack. In 32-bit mode
11041 we push the parameters; __morestack will return on a new stack
11042 anyhow. In 64-bit mode we pass the parameters in r10 and
11043 r11. */
11044 allocate_rtx = GEN_INT (allocate);
11045 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11046 call_fusage = NULL_RTX;
11047 if (TARGET_64BIT)
11048 {
11049 rtx reg10, reg11;
11050
11051 reg10 = gen_rtx_REG (Pmode, R10_REG);
11052 reg11 = gen_rtx_REG (Pmode, R11_REG);
11053
11054 /* If this function uses a static chain, it will be in %r10.
11055 Preserve it across the call to __morestack. */
11056 if (DECL_STATIC_CHAIN (cfun->decl))
11057 {
11058 rtx rax;
11059
11060 rax = gen_rtx_REG (Pmode, AX_REG);
11061 emit_move_insn (rax, reg10);
11062 use_reg (&call_fusage, rax);
11063 }
11064
11065 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11066 {
11067 HOST_WIDE_INT argval;
11068
11069 /* When using the large model we need to load the address
11070 into a register, and we've run out of registers. So we
11071 switch to a different calling convention, and we call a
11072 different function: __morestack_large. We pass the
11073 argument size in the upper 32 bits of r10 and pass the
11074 frame size in the lower 32 bits. */
11075 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11076 gcc_assert ((args_size & 0xffffffff) == args_size);
11077
11078 if (split_stack_fn_large == NULL_RTX)
11079 split_stack_fn_large =
11080 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11081
11082 if (ix86_cmodel == CM_LARGE_PIC)
11083 {
11084 rtx label, x;
11085
11086 label = gen_label_rtx ();
11087 emit_label (label);
11088 LABEL_PRESERVE_P (label) = 1;
11089 emit_insn (gen_set_rip_rex64 (reg10, label));
11090 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11091 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11092 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11093 UNSPEC_GOT);
11094 x = gen_rtx_CONST (Pmode, x);
11095 emit_move_insn (reg11, x);
11096 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11097 x = gen_const_mem (Pmode, x);
11098 emit_move_insn (reg11, x);
11099 }
11100 else
11101 emit_move_insn (reg11, split_stack_fn_large);
11102
11103 fn = reg11;
11104
11105 argval = ((args_size << 16) << 16) + allocate;
11106 emit_move_insn (reg10, GEN_INT (argval));
11107 }
11108 else
11109 {
11110 emit_move_insn (reg10, allocate_rtx);
11111 emit_move_insn (reg11, GEN_INT (args_size));
11112 use_reg (&call_fusage, reg11);
11113 }
11114
11115 use_reg (&call_fusage, reg10);
11116 }
11117 else
11118 {
11119 emit_insn (gen_push (GEN_INT (args_size)));
11120 emit_insn (gen_push (allocate_rtx));
11121 }
11122 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11123 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11124 NULL_RTX, false);
11125 add_function_usage_to (call_insn, call_fusage);
11126
11127 /* In order to make call/return prediction work right, we now need
11128 to execute a return instruction. See
11129 libgcc/config/i386/morestack.S for the details on how this works.
11130
11131 For flow purposes gcc must not see this as a return
11132 instruction--we need control flow to continue at the subsequent
11133 label. Therefore, we use an unspec. */
11134 gcc_assert (crtl->args.pops_args < 65536);
11135 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11136
11137 /* If we are in 64-bit mode and this function uses a static chain,
11138 we saved %r10 in %rax before calling _morestack. */
11139 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11140 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11141 gen_rtx_REG (Pmode, AX_REG));
11142
11143 /* If this function calls va_start, we need to store a pointer to
11144 the arguments on the old stack, because they may not have been
11145 all copied to the new stack. At this point the old stack can be
11146 found at the frame pointer value used by __morestack, because
11147 __morestack has set that up before calling back to us. Here we
11148 store that pointer in a scratch register, and in
11149 ix86_expand_prologue we store the scratch register in a stack
11150 slot. */
11151 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11152 {
11153 unsigned int scratch_regno;
11154 rtx frame_reg;
11155 int words;
11156
11157 scratch_regno = split_stack_prologue_scratch_regno ();
11158 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11159 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11160
11161 /* 64-bit:
11162 fp -> old fp value
11163 return address within this function
11164 return address of caller of this function
11165 stack arguments
11166 So we add three words to get to the stack arguments.
11167
11168 32-bit:
11169 fp -> old fp value
11170 return address within this function
11171 first argument to __morestack
11172 second argument to __morestack
11173 return address of caller of this function
11174 stack arguments
11175 So we add five words to get to the stack arguments.
11176 */
11177 words = TARGET_64BIT ? 3 : 5;
11178 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11179 gen_rtx_PLUS (Pmode, frame_reg,
11180 GEN_INT (words * UNITS_PER_WORD))));
11181
11182 varargs_label = gen_label_rtx ();
11183 emit_jump_insn (gen_jump (varargs_label));
11184 JUMP_LABEL (get_last_insn ()) = varargs_label;
11185
11186 emit_barrier ();
11187 }
11188
11189 emit_label (label);
11190 LABEL_NUSES (label) = 1;
11191
11192 /* If this function calls va_start, we now have to set the scratch
11193 register for the case where we do not call __morestack. In this
11194 case we need to set it based on the stack pointer. */
11195 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11196 {
11197 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11198 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11199 GEN_INT (UNITS_PER_WORD))));
11200
11201 emit_label (varargs_label);
11202 LABEL_NUSES (varargs_label) = 1;
11203 }
11204 }
11205
11206 /* We may have to tell the dataflow pass that the split stack prologue
11207 is initializing a scratch register. */
11208
11209 static void
11210 ix86_live_on_entry (bitmap regs)
11211 {
11212 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11213 {
11214 gcc_assert (flag_split_stack);
11215 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11216 }
11217 }
11218 \f
11219 /* Determine if op is suitable SUBREG RTX for address. */
11220
11221 static bool
11222 ix86_address_subreg_operand (rtx op)
11223 {
11224 enum machine_mode mode;
11225
11226 if (!REG_P (op))
11227 return false;
11228
11229 mode = GET_MODE (op);
11230
11231 if (GET_MODE_CLASS (mode) != MODE_INT)
11232 return false;
11233
11234 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11235 failures when the register is one word out of a two word structure. */
11236 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11237 return false;
11238
11239 /* Allow only SUBREGs of non-eliminable hard registers. */
11240 return register_no_elim_operand (op, mode);
11241 }
11242
11243 /* Extract the parts of an RTL expression that is a valid memory address
11244 for an instruction. Return 0 if the structure of the address is
11245 grossly off. Return -1 if the address contains ASHIFT, so it is not
11246 strictly valid, but still used for computing length of lea instruction. */
11247
11248 int
11249 ix86_decompose_address (rtx addr, struct ix86_address *out)
11250 {
11251 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11252 rtx base_reg, index_reg;
11253 HOST_WIDE_INT scale = 1;
11254 rtx scale_rtx = NULL_RTX;
11255 rtx tmp;
11256 int retval = 1;
11257 enum ix86_address_seg seg = SEG_DEFAULT;
11258
11259 /* Allow zero-extended SImode addresses,
11260 they will be emitted with addr32 prefix. */
11261 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11262 {
11263 if (GET_CODE (addr) == ZERO_EXTEND
11264 && GET_MODE (XEXP (addr, 0)) == SImode)
11265 addr = XEXP (addr, 0);
11266 else if (GET_CODE (addr) == AND
11267 && const_32bit_mask (XEXP (addr, 1), DImode))
11268 {
11269 addr = XEXP (addr, 0);
11270
11271 /* Strip subreg. */
11272 if (GET_CODE (addr) == SUBREG
11273 && GET_MODE (SUBREG_REG (addr)) == SImode)
11274 addr = SUBREG_REG (addr);
11275 }
11276 }
11277
11278 if (REG_P (addr))
11279 base = addr;
11280 else if (GET_CODE (addr) == SUBREG)
11281 {
11282 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11283 base = addr;
11284 else
11285 return 0;
11286 }
11287 else if (GET_CODE (addr) == PLUS)
11288 {
11289 rtx addends[4], op;
11290 int n = 0, i;
11291
11292 op = addr;
11293 do
11294 {
11295 if (n >= 4)
11296 return 0;
11297 addends[n++] = XEXP (op, 1);
11298 op = XEXP (op, 0);
11299 }
11300 while (GET_CODE (op) == PLUS);
11301 if (n >= 4)
11302 return 0;
11303 addends[n] = op;
11304
11305 for (i = n; i >= 0; --i)
11306 {
11307 op = addends[i];
11308 switch (GET_CODE (op))
11309 {
11310 case MULT:
11311 if (index)
11312 return 0;
11313 index = XEXP (op, 0);
11314 scale_rtx = XEXP (op, 1);
11315 break;
11316
11317 case ASHIFT:
11318 if (index)
11319 return 0;
11320 index = XEXP (op, 0);
11321 tmp = XEXP (op, 1);
11322 if (!CONST_INT_P (tmp))
11323 return 0;
11324 scale = INTVAL (tmp);
11325 if ((unsigned HOST_WIDE_INT) scale > 3)
11326 return 0;
11327 scale = 1 << scale;
11328 break;
11329
11330 case UNSPEC:
11331 if (XINT (op, 1) == UNSPEC_TP
11332 && TARGET_TLS_DIRECT_SEG_REFS
11333 && seg == SEG_DEFAULT)
11334 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11335 else
11336 return 0;
11337 break;
11338
11339 case SUBREG:
11340 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11341 return 0;
11342 /* FALLTHRU */
11343
11344 case REG:
11345 if (!base)
11346 base = op;
11347 else if (!index)
11348 index = op;
11349 else
11350 return 0;
11351 break;
11352
11353 case CONST:
11354 case CONST_INT:
11355 case SYMBOL_REF:
11356 case LABEL_REF:
11357 if (disp)
11358 return 0;
11359 disp = op;
11360 break;
11361
11362 default:
11363 return 0;
11364 }
11365 }
11366 }
11367 else if (GET_CODE (addr) == MULT)
11368 {
11369 index = XEXP (addr, 0); /* index*scale */
11370 scale_rtx = XEXP (addr, 1);
11371 }
11372 else if (GET_CODE (addr) == ASHIFT)
11373 {
11374 /* We're called for lea too, which implements ashift on occasion. */
11375 index = XEXP (addr, 0);
11376 tmp = XEXP (addr, 1);
11377 if (!CONST_INT_P (tmp))
11378 return 0;
11379 scale = INTVAL (tmp);
11380 if ((unsigned HOST_WIDE_INT) scale > 3)
11381 return 0;
11382 scale = 1 << scale;
11383 retval = -1;
11384 }
11385 else
11386 disp = addr; /* displacement */
11387
11388 if (index)
11389 {
11390 if (REG_P (index))
11391 ;
11392 else if (GET_CODE (index) == SUBREG
11393 && ix86_address_subreg_operand (SUBREG_REG (index)))
11394 ;
11395 else
11396 return 0;
11397 }
11398
11399 /* Extract the integral value of scale. */
11400 if (scale_rtx)
11401 {
11402 if (!CONST_INT_P (scale_rtx))
11403 return 0;
11404 scale = INTVAL (scale_rtx);
11405 }
11406
11407 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11408 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11409
11410 /* Avoid useless 0 displacement. */
11411 if (disp == const0_rtx && (base || index))
11412 disp = NULL_RTX;
11413
11414 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11415 if (base_reg && index_reg && scale == 1
11416 && (index_reg == arg_pointer_rtx
11417 || index_reg == frame_pointer_rtx
11418 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11419 {
11420 rtx tmp;
11421 tmp = base, base = index, index = tmp;
11422 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11423 }
11424
11425 /* Special case: %ebp cannot be encoded as a base without a displacement.
11426 Similarly %r13. */
11427 if (!disp
11428 && base_reg
11429 && (base_reg == hard_frame_pointer_rtx
11430 || base_reg == frame_pointer_rtx
11431 || base_reg == arg_pointer_rtx
11432 || (REG_P (base_reg)
11433 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11434 || REGNO (base_reg) == R13_REG))))
11435 disp = const0_rtx;
11436
11437 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11438 Avoid this by transforming to [%esi+0].
11439 Reload calls address legitimization without cfun defined, so we need
11440 to test cfun for being non-NULL. */
11441 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11442 && base_reg && !index_reg && !disp
11443 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11444 disp = const0_rtx;
11445
11446 /* Special case: encode reg+reg instead of reg*2. */
11447 if (!base && index && scale == 2)
11448 base = index, base_reg = index_reg, scale = 1;
11449
11450 /* Special case: scaling cannot be encoded without base or displacement. */
11451 if (!base && !disp && index && scale != 1)
11452 disp = const0_rtx;
11453
11454 out->base = base;
11455 out->index = index;
11456 out->disp = disp;
11457 out->scale = scale;
11458 out->seg = seg;
11459
11460 return retval;
11461 }
11462 \f
11463 /* Return cost of the memory address x.
11464 For i386, it is better to use a complex address than let gcc copy
11465 the address into a reg and make a new pseudo. But not if the address
11466 requires to two regs - that would mean more pseudos with longer
11467 lifetimes. */
11468 static int
11469 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11470 {
11471 struct ix86_address parts;
11472 int cost = 1;
11473 int ok = ix86_decompose_address (x, &parts);
11474
11475 gcc_assert (ok);
11476
11477 if (parts.base && GET_CODE (parts.base) == SUBREG)
11478 parts.base = SUBREG_REG (parts.base);
11479 if (parts.index && GET_CODE (parts.index) == SUBREG)
11480 parts.index = SUBREG_REG (parts.index);
11481
11482 /* Attempt to minimize number of registers in the address. */
11483 if ((parts.base
11484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11485 || (parts.index
11486 && (!REG_P (parts.index)
11487 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11488 cost++;
11489
11490 if (parts.base
11491 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11492 && parts.index
11493 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11494 && parts.base != parts.index)
11495 cost++;
11496
11497 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11498 since it's predecode logic can't detect the length of instructions
11499 and it degenerates to vector decoded. Increase cost of such
11500 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11501 to split such addresses or even refuse such addresses at all.
11502
11503 Following addressing modes are affected:
11504 [base+scale*index]
11505 [scale*index+disp]
11506 [base+index]
11507
11508 The first and last case may be avoidable by explicitly coding the zero in
11509 memory address, but I don't have AMD-K6 machine handy to check this
11510 theory. */
11511
11512 if (TARGET_K6
11513 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11514 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11515 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11516 cost += 10;
11517
11518 return cost;
11519 }
11520 \f
11521 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11522 this is used for to form addresses to local data when -fPIC is in
11523 use. */
11524
11525 static bool
11526 darwin_local_data_pic (rtx disp)
11527 {
11528 return (GET_CODE (disp) == UNSPEC
11529 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11530 }
11531
11532 /* Determine if a given RTX is a valid constant. We already know this
11533 satisfies CONSTANT_P. */
11534
11535 static bool
11536 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11537 {
11538 switch (GET_CODE (x))
11539 {
11540 case CONST:
11541 x = XEXP (x, 0);
11542
11543 if (GET_CODE (x) == PLUS)
11544 {
11545 if (!CONST_INT_P (XEXP (x, 1)))
11546 return false;
11547 x = XEXP (x, 0);
11548 }
11549
11550 if (TARGET_MACHO && darwin_local_data_pic (x))
11551 return true;
11552
11553 /* Only some unspecs are valid as "constants". */
11554 if (GET_CODE (x) == UNSPEC)
11555 switch (XINT (x, 1))
11556 {
11557 case UNSPEC_GOT:
11558 case UNSPEC_GOTOFF:
11559 case UNSPEC_PLTOFF:
11560 return TARGET_64BIT;
11561 case UNSPEC_TPOFF:
11562 case UNSPEC_NTPOFF:
11563 x = XVECEXP (x, 0, 0);
11564 return (GET_CODE (x) == SYMBOL_REF
11565 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11566 case UNSPEC_DTPOFF:
11567 x = XVECEXP (x, 0, 0);
11568 return (GET_CODE (x) == SYMBOL_REF
11569 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11570 default:
11571 return false;
11572 }
11573
11574 /* We must have drilled down to a symbol. */
11575 if (GET_CODE (x) == LABEL_REF)
11576 return true;
11577 if (GET_CODE (x) != SYMBOL_REF)
11578 return false;
11579 /* FALLTHRU */
11580
11581 case SYMBOL_REF:
11582 /* TLS symbols are never valid. */
11583 if (SYMBOL_REF_TLS_MODEL (x))
11584 return false;
11585
11586 /* DLLIMPORT symbols are never valid. */
11587 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11588 && SYMBOL_REF_DLLIMPORT_P (x))
11589 return false;
11590
11591 #if TARGET_MACHO
11592 /* mdynamic-no-pic */
11593 if (MACHO_DYNAMIC_NO_PIC_P)
11594 return machopic_symbol_defined_p (x);
11595 #endif
11596 break;
11597
11598 case CONST_DOUBLE:
11599 if (GET_MODE (x) == TImode
11600 && x != CONST0_RTX (TImode)
11601 && !TARGET_64BIT)
11602 return false;
11603 break;
11604
11605 case CONST_VECTOR:
11606 if (!standard_sse_constant_p (x))
11607 return false;
11608
11609 default:
11610 break;
11611 }
11612
11613 /* Otherwise we handle everything else in the move patterns. */
11614 return true;
11615 }
11616
11617 /* Determine if it's legal to put X into the constant pool. This
11618 is not possible for the address of thread-local symbols, which
11619 is checked above. */
11620
11621 static bool
11622 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11623 {
11624 /* We can always put integral constants and vectors in memory. */
11625 switch (GET_CODE (x))
11626 {
11627 case CONST_INT:
11628 case CONST_DOUBLE:
11629 case CONST_VECTOR:
11630 return false;
11631
11632 default:
11633 break;
11634 }
11635 return !ix86_legitimate_constant_p (mode, x);
11636 }
11637
11638
11639 /* Nonzero if the constant value X is a legitimate general operand
11640 when generating PIC code. It is given that flag_pic is on and
11641 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11642
11643 bool
11644 legitimate_pic_operand_p (rtx x)
11645 {
11646 rtx inner;
11647
11648 switch (GET_CODE (x))
11649 {
11650 case CONST:
11651 inner = XEXP (x, 0);
11652 if (GET_CODE (inner) == PLUS
11653 && CONST_INT_P (XEXP (inner, 1)))
11654 inner = XEXP (inner, 0);
11655
11656 /* Only some unspecs are valid as "constants". */
11657 if (GET_CODE (inner) == UNSPEC)
11658 switch (XINT (inner, 1))
11659 {
11660 case UNSPEC_GOT:
11661 case UNSPEC_GOTOFF:
11662 case UNSPEC_PLTOFF:
11663 return TARGET_64BIT;
11664 case UNSPEC_TPOFF:
11665 x = XVECEXP (inner, 0, 0);
11666 return (GET_CODE (x) == SYMBOL_REF
11667 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11668 case UNSPEC_MACHOPIC_OFFSET:
11669 return legitimate_pic_address_disp_p (x);
11670 default:
11671 return false;
11672 }
11673 /* FALLTHRU */
11674
11675 case SYMBOL_REF:
11676 case LABEL_REF:
11677 return legitimate_pic_address_disp_p (x);
11678
11679 default:
11680 return true;
11681 }
11682 }
11683
11684 /* Determine if a given CONST RTX is a valid memory displacement
11685 in PIC mode. */
11686
11687 bool
11688 legitimate_pic_address_disp_p (rtx disp)
11689 {
11690 bool saw_plus;
11691
11692 /* In 64bit mode we can allow direct addresses of symbols and labels
11693 when they are not dynamic symbols. */
11694 if (TARGET_64BIT)
11695 {
11696 rtx op0 = disp, op1;
11697
11698 switch (GET_CODE (disp))
11699 {
11700 case LABEL_REF:
11701 return true;
11702
11703 case CONST:
11704 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11705 break;
11706 op0 = XEXP (XEXP (disp, 0), 0);
11707 op1 = XEXP (XEXP (disp, 0), 1);
11708 if (!CONST_INT_P (op1)
11709 || INTVAL (op1) >= 16*1024*1024
11710 || INTVAL (op1) < -16*1024*1024)
11711 break;
11712 if (GET_CODE (op0) == LABEL_REF)
11713 return true;
11714 if (GET_CODE (op0) != SYMBOL_REF)
11715 break;
11716 /* FALLTHRU */
11717
11718 case SYMBOL_REF:
11719 /* TLS references should always be enclosed in UNSPEC. */
11720 if (SYMBOL_REF_TLS_MODEL (op0))
11721 return false;
11722 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11723 && ix86_cmodel != CM_LARGE_PIC)
11724 return true;
11725 break;
11726
11727 default:
11728 break;
11729 }
11730 }
11731 if (GET_CODE (disp) != CONST)
11732 return false;
11733 disp = XEXP (disp, 0);
11734
11735 if (TARGET_64BIT)
11736 {
11737 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11738 of GOT tables. We should not need these anyway. */
11739 if (GET_CODE (disp) != UNSPEC
11740 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11741 && XINT (disp, 1) != UNSPEC_GOTOFF
11742 && XINT (disp, 1) != UNSPEC_PCREL
11743 && XINT (disp, 1) != UNSPEC_PLTOFF))
11744 return false;
11745
11746 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11747 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11748 return false;
11749 return true;
11750 }
11751
11752 saw_plus = false;
11753 if (GET_CODE (disp) == PLUS)
11754 {
11755 if (!CONST_INT_P (XEXP (disp, 1)))
11756 return false;
11757 disp = XEXP (disp, 0);
11758 saw_plus = true;
11759 }
11760
11761 if (TARGET_MACHO && darwin_local_data_pic (disp))
11762 return true;
11763
11764 if (GET_CODE (disp) != UNSPEC)
11765 return false;
11766
11767 switch (XINT (disp, 1))
11768 {
11769 case UNSPEC_GOT:
11770 if (saw_plus)
11771 return false;
11772 /* We need to check for both symbols and labels because VxWorks loads
11773 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11774 details. */
11775 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11776 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11777 case UNSPEC_GOTOFF:
11778 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11779 While ABI specify also 32bit relocation but we don't produce it in
11780 small PIC model at all. */
11781 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11782 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11783 && !TARGET_64BIT)
11784 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11785 return false;
11786 case UNSPEC_GOTTPOFF:
11787 case UNSPEC_GOTNTPOFF:
11788 case UNSPEC_INDNTPOFF:
11789 if (saw_plus)
11790 return false;
11791 disp = XVECEXP (disp, 0, 0);
11792 return (GET_CODE (disp) == SYMBOL_REF
11793 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11794 case UNSPEC_NTPOFF:
11795 disp = XVECEXP (disp, 0, 0);
11796 return (GET_CODE (disp) == SYMBOL_REF
11797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11798 case UNSPEC_DTPOFF:
11799 disp = XVECEXP (disp, 0, 0);
11800 return (GET_CODE (disp) == SYMBOL_REF
11801 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11802 }
11803
11804 return false;
11805 }
11806
11807 /* Recognizes RTL expressions that are valid memory addresses for an
11808 instruction. The MODE argument is the machine mode for the MEM
11809 expression that wants to use this address.
11810
11811 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11812 convert common non-canonical forms to canonical form so that they will
11813 be recognized. */
11814
11815 static bool
11816 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11817 rtx addr, bool strict)
11818 {
11819 struct ix86_address parts;
11820 rtx base, index, disp;
11821 HOST_WIDE_INT scale;
11822
11823 if (ix86_decompose_address (addr, &parts) <= 0)
11824 /* Decomposition failed. */
11825 return false;
11826
11827 base = parts.base;
11828 index = parts.index;
11829 disp = parts.disp;
11830 scale = parts.scale;
11831
11832 /* Validate base register. */
11833 if (base)
11834 {
11835 rtx reg;
11836
11837 if (REG_P (base))
11838 reg = base;
11839 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11840 reg = SUBREG_REG (base);
11841 else
11842 /* Base is not a register. */
11843 return false;
11844
11845 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11846 return false;
11847
11848 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11849 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11850 /* Base is not valid. */
11851 return false;
11852 }
11853
11854 /* Validate index register. */
11855 if (index)
11856 {
11857 rtx reg;
11858
11859 if (REG_P (index))
11860 reg = index;
11861 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11862 reg = SUBREG_REG (index);
11863 else
11864 /* Index is not a register. */
11865 return false;
11866
11867 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11868 return false;
11869
11870 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11871 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11872 /* Index is not valid. */
11873 return false;
11874 }
11875
11876 /* Index and base should have the same mode. */
11877 if (base && index
11878 && GET_MODE (base) != GET_MODE (index))
11879 return false;
11880
11881 /* Validate scale factor. */
11882 if (scale != 1)
11883 {
11884 if (!index)
11885 /* Scale without index. */
11886 return false;
11887
11888 if (scale != 2 && scale != 4 && scale != 8)
11889 /* Scale is not a valid multiplier. */
11890 return false;
11891 }
11892
11893 /* Validate displacement. */
11894 if (disp)
11895 {
11896 if (GET_CODE (disp) == CONST
11897 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11898 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11899 switch (XINT (XEXP (disp, 0), 1))
11900 {
11901 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11902 used. While ABI specify also 32bit relocations, we don't produce
11903 them at all and use IP relative instead. */
11904 case UNSPEC_GOT:
11905 case UNSPEC_GOTOFF:
11906 gcc_assert (flag_pic);
11907 if (!TARGET_64BIT)
11908 goto is_legitimate_pic;
11909
11910 /* 64bit address unspec. */
11911 return false;
11912
11913 case UNSPEC_GOTPCREL:
11914 case UNSPEC_PCREL:
11915 gcc_assert (flag_pic);
11916 goto is_legitimate_pic;
11917
11918 case UNSPEC_GOTTPOFF:
11919 case UNSPEC_GOTNTPOFF:
11920 case UNSPEC_INDNTPOFF:
11921 case UNSPEC_NTPOFF:
11922 case UNSPEC_DTPOFF:
11923 break;
11924
11925 case UNSPEC_STACK_CHECK:
11926 gcc_assert (flag_split_stack);
11927 break;
11928
11929 default:
11930 /* Invalid address unspec. */
11931 return false;
11932 }
11933
11934 else if (SYMBOLIC_CONST (disp)
11935 && (flag_pic
11936 || (TARGET_MACHO
11937 #if TARGET_MACHO
11938 && MACHOPIC_INDIRECT
11939 && !machopic_operand_p (disp)
11940 #endif
11941 )))
11942 {
11943
11944 is_legitimate_pic:
11945 if (TARGET_64BIT && (index || base))
11946 {
11947 /* foo@dtpoff(%rX) is ok. */
11948 if (GET_CODE (disp) != CONST
11949 || GET_CODE (XEXP (disp, 0)) != PLUS
11950 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11951 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11952 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11953 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11954 /* Non-constant pic memory reference. */
11955 return false;
11956 }
11957 else if ((!TARGET_MACHO || flag_pic)
11958 && ! legitimate_pic_address_disp_p (disp))
11959 /* Displacement is an invalid pic construct. */
11960 return false;
11961 #if TARGET_MACHO
11962 else if (MACHO_DYNAMIC_NO_PIC_P
11963 && !ix86_legitimate_constant_p (Pmode, disp))
11964 /* displacment must be referenced via non_lazy_pointer */
11965 return false;
11966 #endif
11967
11968 /* This code used to verify that a symbolic pic displacement
11969 includes the pic_offset_table_rtx register.
11970
11971 While this is good idea, unfortunately these constructs may
11972 be created by "adds using lea" optimization for incorrect
11973 code like:
11974
11975 int a;
11976 int foo(int i)
11977 {
11978 return *(&a+i);
11979 }
11980
11981 This code is nonsensical, but results in addressing
11982 GOT table with pic_offset_table_rtx base. We can't
11983 just refuse it easily, since it gets matched by
11984 "addsi3" pattern, that later gets split to lea in the
11985 case output register differs from input. While this
11986 can be handled by separate addsi pattern for this case
11987 that never results in lea, this seems to be easier and
11988 correct fix for crash to disable this test. */
11989 }
11990 else if (GET_CODE (disp) != LABEL_REF
11991 && !CONST_INT_P (disp)
11992 && (GET_CODE (disp) != CONST
11993 || !ix86_legitimate_constant_p (Pmode, disp))
11994 && (GET_CODE (disp) != SYMBOL_REF
11995 || !ix86_legitimate_constant_p (Pmode, disp)))
11996 /* Displacement is not constant. */
11997 return false;
11998 else if (TARGET_64BIT
11999 && !x86_64_immediate_operand (disp, VOIDmode))
12000 /* Displacement is out of range. */
12001 return false;
12002 }
12003
12004 /* Everything looks valid. */
12005 return true;
12006 }
12007
12008 /* Determine if a given RTX is a valid constant address. */
12009
12010 bool
12011 constant_address_p (rtx x)
12012 {
12013 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12014 }
12015 \f
12016 /* Return a unique alias set for the GOT. */
12017
12018 static alias_set_type
12019 ix86_GOT_alias_set (void)
12020 {
12021 static alias_set_type set = -1;
12022 if (set == -1)
12023 set = new_alias_set ();
12024 return set;
12025 }
12026
12027 /* Return a legitimate reference for ORIG (an address) using the
12028 register REG. If REG is 0, a new pseudo is generated.
12029
12030 There are two types of references that must be handled:
12031
12032 1. Global data references must load the address from the GOT, via
12033 the PIC reg. An insn is emitted to do this load, and the reg is
12034 returned.
12035
12036 2. Static data references, constant pool addresses, and code labels
12037 compute the address as an offset from the GOT, whose base is in
12038 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12039 differentiate them from global data objects. The returned
12040 address is the PIC reg + an unspec constant.
12041
12042 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12043 reg also appears in the address. */
12044
12045 static rtx
12046 legitimize_pic_address (rtx orig, rtx reg)
12047 {
12048 rtx addr = orig;
12049 rtx new_rtx = orig;
12050 rtx base;
12051
12052 #if TARGET_MACHO
12053 if (TARGET_MACHO && !TARGET_64BIT)
12054 {
12055 if (reg == 0)
12056 reg = gen_reg_rtx (Pmode);
12057 /* Use the generic Mach-O PIC machinery. */
12058 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12059 }
12060 #endif
12061
12062 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12063 new_rtx = addr;
12064 else if (TARGET_64BIT
12065 && ix86_cmodel != CM_SMALL_PIC
12066 && gotoff_operand (addr, Pmode))
12067 {
12068 rtx tmpreg;
12069 /* This symbol may be referenced via a displacement from the PIC
12070 base address (@GOTOFF). */
12071
12072 if (reload_in_progress)
12073 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12074 if (GET_CODE (addr) == CONST)
12075 addr = XEXP (addr, 0);
12076 if (GET_CODE (addr) == PLUS)
12077 {
12078 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12079 UNSPEC_GOTOFF);
12080 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12081 }
12082 else
12083 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12084 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12085 if (!reg)
12086 tmpreg = gen_reg_rtx (Pmode);
12087 else
12088 tmpreg = reg;
12089 emit_move_insn (tmpreg, new_rtx);
12090
12091 if (reg != 0)
12092 {
12093 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12094 tmpreg, 1, OPTAB_DIRECT);
12095 new_rtx = reg;
12096 }
12097 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12098 }
12099 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12100 {
12101 /* This symbol may be referenced via a displacement from the PIC
12102 base address (@GOTOFF). */
12103
12104 if (reload_in_progress)
12105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12106 if (GET_CODE (addr) == CONST)
12107 addr = XEXP (addr, 0);
12108 if (GET_CODE (addr) == PLUS)
12109 {
12110 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12111 UNSPEC_GOTOFF);
12112 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12113 }
12114 else
12115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12117 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12118
12119 if (reg != 0)
12120 {
12121 emit_move_insn (reg, new_rtx);
12122 new_rtx = reg;
12123 }
12124 }
12125 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12126 /* We can't use @GOTOFF for text labels on VxWorks;
12127 see gotoff_operand. */
12128 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12129 {
12130 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12131 {
12132 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12133 return legitimize_dllimport_symbol (addr, true);
12134 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12135 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12136 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12137 {
12138 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12139 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12140 }
12141 }
12142
12143 /* For x64 PE-COFF there is no GOT table. So we use address
12144 directly. */
12145 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12146 {
12147 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12148 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12149
12150 if (reg == 0)
12151 reg = gen_reg_rtx (Pmode);
12152 emit_move_insn (reg, new_rtx);
12153 new_rtx = reg;
12154 }
12155 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12156 {
12157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12159 new_rtx = gen_const_mem (Pmode, new_rtx);
12160 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12161
12162 if (reg == 0)
12163 reg = gen_reg_rtx (Pmode);
12164 /* Use directly gen_movsi, otherwise the address is loaded
12165 into register for CSE. We don't want to CSE this addresses,
12166 instead we CSE addresses from the GOT table, so skip this. */
12167 emit_insn (gen_movsi (reg, new_rtx));
12168 new_rtx = reg;
12169 }
12170 else
12171 {
12172 /* This symbol must be referenced via a load from the
12173 Global Offset Table (@GOT). */
12174
12175 if (reload_in_progress)
12176 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12177 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12178 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12179 if (TARGET_64BIT)
12180 new_rtx = force_reg (Pmode, new_rtx);
12181 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12182 new_rtx = gen_const_mem (Pmode, new_rtx);
12183 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12184
12185 if (reg == 0)
12186 reg = gen_reg_rtx (Pmode);
12187 emit_move_insn (reg, new_rtx);
12188 new_rtx = reg;
12189 }
12190 }
12191 else
12192 {
12193 if (CONST_INT_P (addr)
12194 && !x86_64_immediate_operand (addr, VOIDmode))
12195 {
12196 if (reg)
12197 {
12198 emit_move_insn (reg, addr);
12199 new_rtx = reg;
12200 }
12201 else
12202 new_rtx = force_reg (Pmode, addr);
12203 }
12204 else if (GET_CODE (addr) == CONST)
12205 {
12206 addr = XEXP (addr, 0);
12207
12208 /* We must match stuff we generate before. Assume the only
12209 unspecs that can get here are ours. Not that we could do
12210 anything with them anyway.... */
12211 if (GET_CODE (addr) == UNSPEC
12212 || (GET_CODE (addr) == PLUS
12213 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12214 return orig;
12215 gcc_assert (GET_CODE (addr) == PLUS);
12216 }
12217 if (GET_CODE (addr) == PLUS)
12218 {
12219 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12220
12221 /* Check first to see if this is a constant offset from a @GOTOFF
12222 symbol reference. */
12223 if (gotoff_operand (op0, Pmode)
12224 && CONST_INT_P (op1))
12225 {
12226 if (!TARGET_64BIT)
12227 {
12228 if (reload_in_progress)
12229 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12231 UNSPEC_GOTOFF);
12232 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12233 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12234 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12235
12236 if (reg != 0)
12237 {
12238 emit_move_insn (reg, new_rtx);
12239 new_rtx = reg;
12240 }
12241 }
12242 else
12243 {
12244 if (INTVAL (op1) < -16*1024*1024
12245 || INTVAL (op1) >= 16*1024*1024)
12246 {
12247 if (!x86_64_immediate_operand (op1, Pmode))
12248 op1 = force_reg (Pmode, op1);
12249 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12250 }
12251 }
12252 }
12253 else
12254 {
12255 base = legitimize_pic_address (XEXP (addr, 0), reg);
12256 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12257 base == reg ? NULL_RTX : reg);
12258
12259 if (CONST_INT_P (new_rtx))
12260 new_rtx = plus_constant (base, INTVAL (new_rtx));
12261 else
12262 {
12263 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12264 {
12265 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12266 new_rtx = XEXP (new_rtx, 1);
12267 }
12268 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12269 }
12270 }
12271 }
12272 }
12273 return new_rtx;
12274 }
12275 \f
12276 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12277
12278 static rtx
12279 get_thread_pointer (bool to_reg)
12280 {
12281 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12282
12283 if (GET_MODE (tp) != Pmode)
12284 tp = convert_to_mode (Pmode, tp, 1);
12285
12286 if (to_reg)
12287 tp = copy_addr_to_reg (tp);
12288
12289 return tp;
12290 }
12291
12292 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12293
12294 static GTY(()) rtx ix86_tls_symbol;
12295
12296 static rtx
12297 ix86_tls_get_addr (void)
12298 {
12299 if (!ix86_tls_symbol)
12300 {
12301 const char *sym
12302 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12303 ? "___tls_get_addr" : "__tls_get_addr");
12304
12305 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12306 }
12307
12308 return ix86_tls_symbol;
12309 }
12310
12311 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12312
12313 static GTY(()) rtx ix86_tls_module_base_symbol;
12314
12315 rtx
12316 ix86_tls_module_base (void)
12317 {
12318 if (!ix86_tls_module_base_symbol)
12319 {
12320 ix86_tls_module_base_symbol
12321 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12322
12323 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12324 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12325 }
12326
12327 return ix86_tls_module_base_symbol;
12328 }
12329
12330 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12331 false if we expect this to be used for a memory address and true if
12332 we expect to load the address into a register. */
12333
12334 static rtx
12335 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12336 {
12337 rtx dest, base, off;
12338 rtx pic = NULL_RTX, tp = NULL_RTX;
12339 int type;
12340
12341 switch (model)
12342 {
12343 case TLS_MODEL_GLOBAL_DYNAMIC:
12344 dest = gen_reg_rtx (Pmode);
12345
12346 if (!TARGET_64BIT)
12347 {
12348 if (flag_pic)
12349 pic = pic_offset_table_rtx;
12350 else
12351 {
12352 pic = gen_reg_rtx (Pmode);
12353 emit_insn (gen_set_got (pic));
12354 }
12355 }
12356
12357 if (TARGET_GNU2_TLS)
12358 {
12359 if (TARGET_64BIT)
12360 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12361 else
12362 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12363
12364 tp = get_thread_pointer (true);
12365 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12366
12367 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12368 }
12369 else
12370 {
12371 rtx caddr = ix86_tls_get_addr ();
12372
12373 if (TARGET_64BIT)
12374 {
12375 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12376
12377 start_sequence ();
12378 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12379 insns = get_insns ();
12380 end_sequence ();
12381
12382 RTL_CONST_CALL_P (insns) = 1;
12383 emit_libcall_block (insns, dest, rax, x);
12384 }
12385 else
12386 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12387 }
12388 break;
12389
12390 case TLS_MODEL_LOCAL_DYNAMIC:
12391 base = gen_reg_rtx (Pmode);
12392
12393 if (!TARGET_64BIT)
12394 {
12395 if (flag_pic)
12396 pic = pic_offset_table_rtx;
12397 else
12398 {
12399 pic = gen_reg_rtx (Pmode);
12400 emit_insn (gen_set_got (pic));
12401 }
12402 }
12403
12404 if (TARGET_GNU2_TLS)
12405 {
12406 rtx tmp = ix86_tls_module_base ();
12407
12408 if (TARGET_64BIT)
12409 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12410 else
12411 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12412
12413 tp = get_thread_pointer (true);
12414 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12415 gen_rtx_MINUS (Pmode, tmp, tp));
12416 }
12417 else
12418 {
12419 rtx caddr = ix86_tls_get_addr ();
12420
12421 if (TARGET_64BIT)
12422 {
12423 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12424
12425 start_sequence ();
12426 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12427 insns = get_insns ();
12428 end_sequence ();
12429
12430 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12431 share the LD_BASE result with other LD model accesses. */
12432 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12433 UNSPEC_TLS_LD_BASE);
12434
12435 RTL_CONST_CALL_P (insns) = 1;
12436 emit_libcall_block (insns, base, rax, eqv);
12437 }
12438 else
12439 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12440 }
12441
12442 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12443 off = gen_rtx_CONST (Pmode, off);
12444
12445 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12446
12447 if (TARGET_GNU2_TLS)
12448 {
12449 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12450
12451 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12452 }
12453 break;
12454
12455 case TLS_MODEL_INITIAL_EXEC:
12456 if (TARGET_64BIT)
12457 {
12458 if (TARGET_SUN_TLS)
12459 {
12460 /* The Sun linker took the AMD64 TLS spec literally
12461 and can only handle %rax as destination of the
12462 initial executable code sequence. */
12463
12464 dest = gen_reg_rtx (Pmode);
12465 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12466 return dest;
12467 }
12468
12469 pic = NULL;
12470 type = UNSPEC_GOTNTPOFF;
12471 }
12472 else if (flag_pic)
12473 {
12474 if (reload_in_progress)
12475 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12476 pic = pic_offset_table_rtx;
12477 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12478 }
12479 else if (!TARGET_ANY_GNU_TLS)
12480 {
12481 pic = gen_reg_rtx (Pmode);
12482 emit_insn (gen_set_got (pic));
12483 type = UNSPEC_GOTTPOFF;
12484 }
12485 else
12486 {
12487 pic = NULL;
12488 type = UNSPEC_INDNTPOFF;
12489 }
12490
12491 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12492 off = gen_rtx_CONST (Pmode, off);
12493 if (pic)
12494 off = gen_rtx_PLUS (Pmode, pic, off);
12495 off = gen_const_mem (Pmode, off);
12496 set_mem_alias_set (off, ix86_GOT_alias_set ());
12497
12498 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12499 {
12500 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12501 off = force_reg (Pmode, off);
12502 return gen_rtx_PLUS (Pmode, base, off);
12503 }
12504 else
12505 {
12506 base = get_thread_pointer (true);
12507 dest = gen_reg_rtx (Pmode);
12508 emit_insn (gen_subsi3 (dest, base, off));
12509 }
12510 break;
12511
12512 case TLS_MODEL_LOCAL_EXEC:
12513 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12514 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12515 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12516 off = gen_rtx_CONST (Pmode, off);
12517
12518 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12519 {
12520 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12521 return gen_rtx_PLUS (Pmode, base, off);
12522 }
12523 else
12524 {
12525 base = get_thread_pointer (true);
12526 dest = gen_reg_rtx (Pmode);
12527 emit_insn (gen_subsi3 (dest, base, off));
12528 }
12529 break;
12530
12531 default:
12532 gcc_unreachable ();
12533 }
12534
12535 return dest;
12536 }
12537
12538 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12539 to symbol DECL. */
12540
12541 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12542 htab_t dllimport_map;
12543
12544 static tree
12545 get_dllimport_decl (tree decl)
12546 {
12547 struct tree_map *h, in;
12548 void **loc;
12549 const char *name;
12550 const char *prefix;
12551 size_t namelen, prefixlen;
12552 char *imp_name;
12553 tree to;
12554 rtx rtl;
12555
12556 if (!dllimport_map)
12557 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12558
12559 in.hash = htab_hash_pointer (decl);
12560 in.base.from = decl;
12561 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12562 h = (struct tree_map *) *loc;
12563 if (h)
12564 return h->to;
12565
12566 *loc = h = ggc_alloc_tree_map ();
12567 h->hash = in.hash;
12568 h->base.from = decl;
12569 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12570 VAR_DECL, NULL, ptr_type_node);
12571 DECL_ARTIFICIAL (to) = 1;
12572 DECL_IGNORED_P (to) = 1;
12573 DECL_EXTERNAL (to) = 1;
12574 TREE_READONLY (to) = 1;
12575
12576 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12577 name = targetm.strip_name_encoding (name);
12578 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12579 ? "*__imp_" : "*__imp__";
12580 namelen = strlen (name);
12581 prefixlen = strlen (prefix);
12582 imp_name = (char *) alloca (namelen + prefixlen + 1);
12583 memcpy (imp_name, prefix, prefixlen);
12584 memcpy (imp_name + prefixlen, name, namelen + 1);
12585
12586 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12587 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12588 SET_SYMBOL_REF_DECL (rtl, to);
12589 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12590
12591 rtl = gen_const_mem (Pmode, rtl);
12592 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12593
12594 SET_DECL_RTL (to, rtl);
12595 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12596
12597 return to;
12598 }
12599
12600 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12601 true if we require the result be a register. */
12602
12603 static rtx
12604 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12605 {
12606 tree imp_decl;
12607 rtx x;
12608
12609 gcc_assert (SYMBOL_REF_DECL (symbol));
12610 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12611
12612 x = DECL_RTL (imp_decl);
12613 if (want_reg)
12614 x = force_reg (Pmode, x);
12615 return x;
12616 }
12617
12618 /* Try machine-dependent ways of modifying an illegitimate address
12619 to be legitimate. If we find one, return the new, valid address.
12620 This macro is used in only one place: `memory_address' in explow.c.
12621
12622 OLDX is the address as it was before break_out_memory_refs was called.
12623 In some cases it is useful to look at this to decide what needs to be done.
12624
12625 It is always safe for this macro to do nothing. It exists to recognize
12626 opportunities to optimize the output.
12627
12628 For the 80386, we handle X+REG by loading X into a register R and
12629 using R+REG. R will go in a general reg and indexing will be used.
12630 However, if REG is a broken-out memory address or multiplication,
12631 nothing needs to be done because REG can certainly go in a general reg.
12632
12633 When -fpic is used, special handling is needed for symbolic references.
12634 See comments by legitimize_pic_address in i386.c for details. */
12635
12636 static rtx
12637 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12638 enum machine_mode mode)
12639 {
12640 int changed = 0;
12641 unsigned log;
12642
12643 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12644 if (log)
12645 return legitimize_tls_address (x, (enum tls_model) log, false);
12646 if (GET_CODE (x) == CONST
12647 && GET_CODE (XEXP (x, 0)) == PLUS
12648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12649 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12650 {
12651 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12652 (enum tls_model) log, false);
12653 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12654 }
12655
12656 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12657 {
12658 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12659 return legitimize_dllimport_symbol (x, true);
12660 if (GET_CODE (x) == CONST
12661 && GET_CODE (XEXP (x, 0)) == PLUS
12662 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12663 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12664 {
12665 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12666 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12667 }
12668 }
12669
12670 if (flag_pic && SYMBOLIC_CONST (x))
12671 return legitimize_pic_address (x, 0);
12672
12673 #if TARGET_MACHO
12674 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12675 return machopic_indirect_data_reference (x, 0);
12676 #endif
12677
12678 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12679 if (GET_CODE (x) == ASHIFT
12680 && CONST_INT_P (XEXP (x, 1))
12681 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12682 {
12683 changed = 1;
12684 log = INTVAL (XEXP (x, 1));
12685 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12686 GEN_INT (1 << log));
12687 }
12688
12689 if (GET_CODE (x) == PLUS)
12690 {
12691 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12692
12693 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12694 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12695 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12696 {
12697 changed = 1;
12698 log = INTVAL (XEXP (XEXP (x, 0), 1));
12699 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12700 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12701 GEN_INT (1 << log));
12702 }
12703
12704 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12705 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12706 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12707 {
12708 changed = 1;
12709 log = INTVAL (XEXP (XEXP (x, 1), 1));
12710 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12711 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12712 GEN_INT (1 << log));
12713 }
12714
12715 /* Put multiply first if it isn't already. */
12716 if (GET_CODE (XEXP (x, 1)) == MULT)
12717 {
12718 rtx tmp = XEXP (x, 0);
12719 XEXP (x, 0) = XEXP (x, 1);
12720 XEXP (x, 1) = tmp;
12721 changed = 1;
12722 }
12723
12724 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12725 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12726 created by virtual register instantiation, register elimination, and
12727 similar optimizations. */
12728 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12729 {
12730 changed = 1;
12731 x = gen_rtx_PLUS (Pmode,
12732 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12733 XEXP (XEXP (x, 1), 0)),
12734 XEXP (XEXP (x, 1), 1));
12735 }
12736
12737 /* Canonicalize
12738 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12739 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12740 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12741 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12742 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12743 && CONSTANT_P (XEXP (x, 1)))
12744 {
12745 rtx constant;
12746 rtx other = NULL_RTX;
12747
12748 if (CONST_INT_P (XEXP (x, 1)))
12749 {
12750 constant = XEXP (x, 1);
12751 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12752 }
12753 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12754 {
12755 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12756 other = XEXP (x, 1);
12757 }
12758 else
12759 constant = 0;
12760
12761 if (constant)
12762 {
12763 changed = 1;
12764 x = gen_rtx_PLUS (Pmode,
12765 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12766 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12767 plus_constant (other, INTVAL (constant)));
12768 }
12769 }
12770
12771 if (changed && ix86_legitimate_address_p (mode, x, false))
12772 return x;
12773
12774 if (GET_CODE (XEXP (x, 0)) == MULT)
12775 {
12776 changed = 1;
12777 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12778 }
12779
12780 if (GET_CODE (XEXP (x, 1)) == MULT)
12781 {
12782 changed = 1;
12783 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12784 }
12785
12786 if (changed
12787 && REG_P (XEXP (x, 1))
12788 && REG_P (XEXP (x, 0)))
12789 return x;
12790
12791 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12792 {
12793 changed = 1;
12794 x = legitimize_pic_address (x, 0);
12795 }
12796
12797 if (changed && ix86_legitimate_address_p (mode, x, false))
12798 return x;
12799
12800 if (REG_P (XEXP (x, 0)))
12801 {
12802 rtx temp = gen_reg_rtx (Pmode);
12803 rtx val = force_operand (XEXP (x, 1), temp);
12804 if (val != temp)
12805 {
12806 if (GET_MODE (val) != Pmode)
12807 val = convert_to_mode (Pmode, val, 1);
12808 emit_move_insn (temp, val);
12809 }
12810
12811 XEXP (x, 1) = temp;
12812 return x;
12813 }
12814
12815 else if (REG_P (XEXP (x, 1)))
12816 {
12817 rtx temp = gen_reg_rtx (Pmode);
12818 rtx val = force_operand (XEXP (x, 0), temp);
12819 if (val != temp)
12820 {
12821 if (GET_MODE (val) != Pmode)
12822 val = convert_to_mode (Pmode, val, 1);
12823 emit_move_insn (temp, val);
12824 }
12825
12826 XEXP (x, 0) = temp;
12827 return x;
12828 }
12829 }
12830
12831 return x;
12832 }
12833 \f
12834 /* Print an integer constant expression in assembler syntax. Addition
12835 and subtraction are the only arithmetic that may appear in these
12836 expressions. FILE is the stdio stream to write to, X is the rtx, and
12837 CODE is the operand print code from the output string. */
12838
12839 static void
12840 output_pic_addr_const (FILE *file, rtx x, int code)
12841 {
12842 char buf[256];
12843
12844 switch (GET_CODE (x))
12845 {
12846 case PC:
12847 gcc_assert (flag_pic);
12848 putc ('.', file);
12849 break;
12850
12851 case SYMBOL_REF:
12852 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12853 output_addr_const (file, x);
12854 else
12855 {
12856 const char *name = XSTR (x, 0);
12857
12858 /* Mark the decl as referenced so that cgraph will
12859 output the function. */
12860 if (SYMBOL_REF_DECL (x))
12861 mark_decl_referenced (SYMBOL_REF_DECL (x));
12862
12863 #if TARGET_MACHO
12864 if (MACHOPIC_INDIRECT
12865 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12866 name = machopic_indirection_name (x, /*stub_p=*/true);
12867 #endif
12868 assemble_name (file, name);
12869 }
12870 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12871 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12872 fputs ("@PLT", file);
12873 break;
12874
12875 case LABEL_REF:
12876 x = XEXP (x, 0);
12877 /* FALLTHRU */
12878 case CODE_LABEL:
12879 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12880 assemble_name (asm_out_file, buf);
12881 break;
12882
12883 case CONST_INT:
12884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12885 break;
12886
12887 case CONST:
12888 /* This used to output parentheses around the expression,
12889 but that does not work on the 386 (either ATT or BSD assembler). */
12890 output_pic_addr_const (file, XEXP (x, 0), code);
12891 break;
12892
12893 case CONST_DOUBLE:
12894 if (GET_MODE (x) == VOIDmode)
12895 {
12896 /* We can use %d if the number is <32 bits and positive. */
12897 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12898 fprintf (file, "0x%lx%08lx",
12899 (unsigned long) CONST_DOUBLE_HIGH (x),
12900 (unsigned long) CONST_DOUBLE_LOW (x));
12901 else
12902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12903 }
12904 else
12905 /* We can't handle floating point constants;
12906 TARGET_PRINT_OPERAND must handle them. */
12907 output_operand_lossage ("floating constant misused");
12908 break;
12909
12910 case PLUS:
12911 /* Some assemblers need integer constants to appear first. */
12912 if (CONST_INT_P (XEXP (x, 0)))
12913 {
12914 output_pic_addr_const (file, XEXP (x, 0), code);
12915 putc ('+', file);
12916 output_pic_addr_const (file, XEXP (x, 1), code);
12917 }
12918 else
12919 {
12920 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12921 output_pic_addr_const (file, XEXP (x, 1), code);
12922 putc ('+', file);
12923 output_pic_addr_const (file, XEXP (x, 0), code);
12924 }
12925 break;
12926
12927 case MINUS:
12928 if (!TARGET_MACHO)
12929 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12930 output_pic_addr_const (file, XEXP (x, 0), code);
12931 putc ('-', file);
12932 output_pic_addr_const (file, XEXP (x, 1), code);
12933 if (!TARGET_MACHO)
12934 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12935 break;
12936
12937 case UNSPEC:
12938 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12939 {
12940 bool f = i386_asm_output_addr_const_extra (file, x);
12941 gcc_assert (f);
12942 break;
12943 }
12944
12945 gcc_assert (XVECLEN (x, 0) == 1);
12946 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12947 switch (XINT (x, 1))
12948 {
12949 case UNSPEC_GOT:
12950 fputs ("@GOT", file);
12951 break;
12952 case UNSPEC_GOTOFF:
12953 fputs ("@GOTOFF", file);
12954 break;
12955 case UNSPEC_PLTOFF:
12956 fputs ("@PLTOFF", file);
12957 break;
12958 case UNSPEC_PCREL:
12959 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12960 "(%rip)" : "[rip]", file);
12961 break;
12962 case UNSPEC_GOTPCREL:
12963 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12964 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12965 break;
12966 case UNSPEC_GOTTPOFF:
12967 /* FIXME: This might be @TPOFF in Sun ld too. */
12968 fputs ("@gottpoff", file);
12969 break;
12970 case UNSPEC_TPOFF:
12971 fputs ("@tpoff", file);
12972 break;
12973 case UNSPEC_NTPOFF:
12974 if (TARGET_64BIT)
12975 fputs ("@tpoff", file);
12976 else
12977 fputs ("@ntpoff", file);
12978 break;
12979 case UNSPEC_DTPOFF:
12980 fputs ("@dtpoff", file);
12981 break;
12982 case UNSPEC_GOTNTPOFF:
12983 if (TARGET_64BIT)
12984 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12985 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12986 else
12987 fputs ("@gotntpoff", file);
12988 break;
12989 case UNSPEC_INDNTPOFF:
12990 fputs ("@indntpoff", file);
12991 break;
12992 #if TARGET_MACHO
12993 case UNSPEC_MACHOPIC_OFFSET:
12994 putc ('-', file);
12995 machopic_output_function_base_name (file);
12996 break;
12997 #endif
12998 default:
12999 output_operand_lossage ("invalid UNSPEC as operand");
13000 break;
13001 }
13002 break;
13003
13004 default:
13005 output_operand_lossage ("invalid expression as operand");
13006 }
13007 }
13008
13009 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13010 We need to emit DTP-relative relocations. */
13011
13012 static void ATTRIBUTE_UNUSED
13013 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13014 {
13015 fputs (ASM_LONG, file);
13016 output_addr_const (file, x);
13017 fputs ("@dtpoff", file);
13018 switch (size)
13019 {
13020 case 4:
13021 break;
13022 case 8:
13023 fputs (", 0", file);
13024 break;
13025 default:
13026 gcc_unreachable ();
13027 }
13028 }
13029
13030 /* Return true if X is a representation of the PIC register. This copes
13031 with calls from ix86_find_base_term, where the register might have
13032 been replaced by a cselib value. */
13033
13034 static bool
13035 ix86_pic_register_p (rtx x)
13036 {
13037 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13038 return (pic_offset_table_rtx
13039 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13040 else
13041 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13042 }
13043
13044 /* Helper function for ix86_delegitimize_address.
13045 Attempt to delegitimize TLS local-exec accesses. */
13046
13047 static rtx
13048 ix86_delegitimize_tls_address (rtx orig_x)
13049 {
13050 rtx x = orig_x, unspec;
13051 struct ix86_address addr;
13052
13053 if (!TARGET_TLS_DIRECT_SEG_REFS)
13054 return orig_x;
13055 if (MEM_P (x))
13056 x = XEXP (x, 0);
13057 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13058 return orig_x;
13059 if (ix86_decompose_address (x, &addr) == 0
13060 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13061 || addr.disp == NULL_RTX
13062 || GET_CODE (addr.disp) != CONST)
13063 return orig_x;
13064 unspec = XEXP (addr.disp, 0);
13065 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13066 unspec = XEXP (unspec, 0);
13067 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13068 return orig_x;
13069 x = XVECEXP (unspec, 0, 0);
13070 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13071 if (unspec != XEXP (addr.disp, 0))
13072 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13073 if (addr.index)
13074 {
13075 rtx idx = addr.index;
13076 if (addr.scale != 1)
13077 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13078 x = gen_rtx_PLUS (Pmode, idx, x);
13079 }
13080 if (addr.base)
13081 x = gen_rtx_PLUS (Pmode, addr.base, x);
13082 if (MEM_P (orig_x))
13083 x = replace_equiv_address_nv (orig_x, x);
13084 return x;
13085 }
13086
13087 /* In the name of slightly smaller debug output, and to cater to
13088 general assembler lossage, recognize PIC+GOTOFF and turn it back
13089 into a direct symbol reference.
13090
13091 On Darwin, this is necessary to avoid a crash, because Darwin
13092 has a different PIC label for each routine but the DWARF debugging
13093 information is not associated with any particular routine, so it's
13094 necessary to remove references to the PIC label from RTL stored by
13095 the DWARF output code. */
13096
13097 static rtx
13098 ix86_delegitimize_address (rtx x)
13099 {
13100 rtx orig_x = delegitimize_mem_from_attrs (x);
13101 /* addend is NULL or some rtx if x is something+GOTOFF where
13102 something doesn't include the PIC register. */
13103 rtx addend = NULL_RTX;
13104 /* reg_addend is NULL or a multiple of some register. */
13105 rtx reg_addend = NULL_RTX;
13106 /* const_addend is NULL or a const_int. */
13107 rtx const_addend = NULL_RTX;
13108 /* This is the result, or NULL. */
13109 rtx result = NULL_RTX;
13110
13111 x = orig_x;
13112
13113 if (MEM_P (x))
13114 x = XEXP (x, 0);
13115
13116 if (TARGET_64BIT)
13117 {
13118 if (GET_CODE (x) != CONST
13119 || GET_CODE (XEXP (x, 0)) != UNSPEC
13120 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13121 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13122 || !MEM_P (orig_x))
13123 return ix86_delegitimize_tls_address (orig_x);
13124 x = XVECEXP (XEXP (x, 0), 0, 0);
13125 if (GET_MODE (orig_x) != GET_MODE (x))
13126 {
13127 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13128 GET_MODE (x), 0);
13129 if (x == NULL_RTX)
13130 return orig_x;
13131 }
13132 return x;
13133 }
13134
13135 if (GET_CODE (x) != PLUS
13136 || GET_CODE (XEXP (x, 1)) != CONST)
13137 return ix86_delegitimize_tls_address (orig_x);
13138
13139 if (ix86_pic_register_p (XEXP (x, 0)))
13140 /* %ebx + GOT/GOTOFF */
13141 ;
13142 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13143 {
13144 /* %ebx + %reg * scale + GOT/GOTOFF */
13145 reg_addend = XEXP (x, 0);
13146 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13147 reg_addend = XEXP (reg_addend, 1);
13148 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13149 reg_addend = XEXP (reg_addend, 0);
13150 else
13151 {
13152 reg_addend = NULL_RTX;
13153 addend = XEXP (x, 0);
13154 }
13155 }
13156 else
13157 addend = XEXP (x, 0);
13158
13159 x = XEXP (XEXP (x, 1), 0);
13160 if (GET_CODE (x) == PLUS
13161 && CONST_INT_P (XEXP (x, 1)))
13162 {
13163 const_addend = XEXP (x, 1);
13164 x = XEXP (x, 0);
13165 }
13166
13167 if (GET_CODE (x) == UNSPEC
13168 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13169 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13170 result = XVECEXP (x, 0, 0);
13171
13172 if (TARGET_MACHO && darwin_local_data_pic (x)
13173 && !MEM_P (orig_x))
13174 result = XVECEXP (x, 0, 0);
13175
13176 if (! result)
13177 return ix86_delegitimize_tls_address (orig_x);
13178
13179 if (const_addend)
13180 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13181 if (reg_addend)
13182 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13183 if (addend)
13184 {
13185 /* If the rest of original X doesn't involve the PIC register, add
13186 addend and subtract pic_offset_table_rtx. This can happen e.g.
13187 for code like:
13188 leal (%ebx, %ecx, 4), %ecx
13189 ...
13190 movl foo@GOTOFF(%ecx), %edx
13191 in which case we return (%ecx - %ebx) + foo. */
13192 if (pic_offset_table_rtx)
13193 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13194 pic_offset_table_rtx),
13195 result);
13196 else
13197 return orig_x;
13198 }
13199 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13200 {
13201 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13202 if (result == NULL_RTX)
13203 return orig_x;
13204 }
13205 return result;
13206 }
13207
13208 /* If X is a machine specific address (i.e. a symbol or label being
13209 referenced as a displacement from the GOT implemented using an
13210 UNSPEC), then return the base term. Otherwise return X. */
13211
13212 rtx
13213 ix86_find_base_term (rtx x)
13214 {
13215 rtx term;
13216
13217 if (TARGET_64BIT)
13218 {
13219 if (GET_CODE (x) != CONST)
13220 return x;
13221 term = XEXP (x, 0);
13222 if (GET_CODE (term) == PLUS
13223 && (CONST_INT_P (XEXP (term, 1))
13224 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13225 term = XEXP (term, 0);
13226 if (GET_CODE (term) != UNSPEC
13227 || (XINT (term, 1) != UNSPEC_GOTPCREL
13228 && XINT (term, 1) != UNSPEC_PCREL))
13229 return x;
13230
13231 return XVECEXP (term, 0, 0);
13232 }
13233
13234 return ix86_delegitimize_address (x);
13235 }
13236 \f
13237 static void
13238 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13239 int fp, FILE *file)
13240 {
13241 const char *suffix;
13242
13243 if (mode == CCFPmode || mode == CCFPUmode)
13244 {
13245 code = ix86_fp_compare_code_to_integer (code);
13246 mode = CCmode;
13247 }
13248 if (reverse)
13249 code = reverse_condition (code);
13250
13251 switch (code)
13252 {
13253 case EQ:
13254 switch (mode)
13255 {
13256 case CCAmode:
13257 suffix = "a";
13258 break;
13259
13260 case CCCmode:
13261 suffix = "c";
13262 break;
13263
13264 case CCOmode:
13265 suffix = "o";
13266 break;
13267
13268 case CCSmode:
13269 suffix = "s";
13270 break;
13271
13272 default:
13273 suffix = "e";
13274 }
13275 break;
13276 case NE:
13277 switch (mode)
13278 {
13279 case CCAmode:
13280 suffix = "na";
13281 break;
13282
13283 case CCCmode:
13284 suffix = "nc";
13285 break;
13286
13287 case CCOmode:
13288 suffix = "no";
13289 break;
13290
13291 case CCSmode:
13292 suffix = "ns";
13293 break;
13294
13295 default:
13296 suffix = "ne";
13297 }
13298 break;
13299 case GT:
13300 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13301 suffix = "g";
13302 break;
13303 case GTU:
13304 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13305 Those same assemblers have the same but opposite lossage on cmov. */
13306 if (mode == CCmode)
13307 suffix = fp ? "nbe" : "a";
13308 else if (mode == CCCmode)
13309 suffix = "b";
13310 else
13311 gcc_unreachable ();
13312 break;
13313 case LT:
13314 switch (mode)
13315 {
13316 case CCNOmode:
13317 case CCGOCmode:
13318 suffix = "s";
13319 break;
13320
13321 case CCmode:
13322 case CCGCmode:
13323 suffix = "l";
13324 break;
13325
13326 default:
13327 gcc_unreachable ();
13328 }
13329 break;
13330 case LTU:
13331 gcc_assert (mode == CCmode || mode == CCCmode);
13332 suffix = "b";
13333 break;
13334 case GE:
13335 switch (mode)
13336 {
13337 case CCNOmode:
13338 case CCGOCmode:
13339 suffix = "ns";
13340 break;
13341
13342 case CCmode:
13343 case CCGCmode:
13344 suffix = "ge";
13345 break;
13346
13347 default:
13348 gcc_unreachable ();
13349 }
13350 break;
13351 case GEU:
13352 /* ??? As above. */
13353 gcc_assert (mode == CCmode || mode == CCCmode);
13354 suffix = fp ? "nb" : "ae";
13355 break;
13356 case LE:
13357 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13358 suffix = "le";
13359 break;
13360 case LEU:
13361 /* ??? As above. */
13362 if (mode == CCmode)
13363 suffix = "be";
13364 else if (mode == CCCmode)
13365 suffix = fp ? "nb" : "ae";
13366 else
13367 gcc_unreachable ();
13368 break;
13369 case UNORDERED:
13370 suffix = fp ? "u" : "p";
13371 break;
13372 case ORDERED:
13373 suffix = fp ? "nu" : "np";
13374 break;
13375 default:
13376 gcc_unreachable ();
13377 }
13378 fputs (suffix, file);
13379 }
13380
13381 /* Print the name of register X to FILE based on its machine mode and number.
13382 If CODE is 'w', pretend the mode is HImode.
13383 If CODE is 'b', pretend the mode is QImode.
13384 If CODE is 'k', pretend the mode is SImode.
13385 If CODE is 'q', pretend the mode is DImode.
13386 If CODE is 'x', pretend the mode is V4SFmode.
13387 If CODE is 't', pretend the mode is V8SFmode.
13388 If CODE is 'h', pretend the reg is the 'high' byte register.
13389 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13390 If CODE is 'd', duplicate the operand for AVX instruction.
13391 */
13392
13393 void
13394 print_reg (rtx x, int code, FILE *file)
13395 {
13396 const char *reg;
13397 bool duplicated = code == 'd' && TARGET_AVX;
13398
13399 gcc_assert (x == pc_rtx
13400 || (REGNO (x) != ARG_POINTER_REGNUM
13401 && REGNO (x) != FRAME_POINTER_REGNUM
13402 && REGNO (x) != FLAGS_REG
13403 && REGNO (x) != FPSR_REG
13404 && REGNO (x) != FPCR_REG));
13405
13406 if (ASSEMBLER_DIALECT == ASM_ATT)
13407 putc ('%', file);
13408
13409 if (x == pc_rtx)
13410 {
13411 gcc_assert (TARGET_64BIT);
13412 fputs ("rip", file);
13413 return;
13414 }
13415
13416 if (code == 'w' || MMX_REG_P (x))
13417 code = 2;
13418 else if (code == 'b')
13419 code = 1;
13420 else if (code == 'k')
13421 code = 4;
13422 else if (code == 'q')
13423 code = 8;
13424 else if (code == 'y')
13425 code = 3;
13426 else if (code == 'h')
13427 code = 0;
13428 else if (code == 'x')
13429 code = 16;
13430 else if (code == 't')
13431 code = 32;
13432 else
13433 code = GET_MODE_SIZE (GET_MODE (x));
13434
13435 /* Irritatingly, AMD extended registers use different naming convention
13436 from the normal registers. */
13437 if (REX_INT_REG_P (x))
13438 {
13439 gcc_assert (TARGET_64BIT);
13440 switch (code)
13441 {
13442 case 0:
13443 error ("extended registers have no high halves");
13444 break;
13445 case 1:
13446 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13447 break;
13448 case 2:
13449 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13450 break;
13451 case 4:
13452 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13453 break;
13454 case 8:
13455 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13456 break;
13457 default:
13458 error ("unsupported operand size for extended register");
13459 break;
13460 }
13461 return;
13462 }
13463
13464 reg = NULL;
13465 switch (code)
13466 {
13467 case 3:
13468 if (STACK_TOP_P (x))
13469 {
13470 reg = "st(0)";
13471 break;
13472 }
13473 /* FALLTHRU */
13474 case 8:
13475 case 4:
13476 case 12:
13477 if (! ANY_FP_REG_P (x))
13478 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13479 /* FALLTHRU */
13480 case 16:
13481 case 2:
13482 normal:
13483 reg = hi_reg_name[REGNO (x)];
13484 break;
13485 case 1:
13486 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13487 goto normal;
13488 reg = qi_reg_name[REGNO (x)];
13489 break;
13490 case 0:
13491 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13492 goto normal;
13493 reg = qi_high_reg_name[REGNO (x)];
13494 break;
13495 case 32:
13496 if (SSE_REG_P (x))
13497 {
13498 gcc_assert (!duplicated);
13499 putc ('y', file);
13500 fputs (hi_reg_name[REGNO (x)] + 1, file);
13501 return;
13502 }
13503 break;
13504 default:
13505 gcc_unreachable ();
13506 }
13507
13508 fputs (reg, file);
13509 if (duplicated)
13510 {
13511 if (ASSEMBLER_DIALECT == ASM_ATT)
13512 fprintf (file, ", %%%s", reg);
13513 else
13514 fprintf (file, ", %s", reg);
13515 }
13516 }
13517
13518 /* Locate some local-dynamic symbol still in use by this function
13519 so that we can print its name in some tls_local_dynamic_base
13520 pattern. */
13521
13522 static int
13523 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13524 {
13525 rtx x = *px;
13526
13527 if (GET_CODE (x) == SYMBOL_REF
13528 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13529 {
13530 cfun->machine->some_ld_name = XSTR (x, 0);
13531 return 1;
13532 }
13533
13534 return 0;
13535 }
13536
13537 static const char *
13538 get_some_local_dynamic_name (void)
13539 {
13540 rtx insn;
13541
13542 if (cfun->machine->some_ld_name)
13543 return cfun->machine->some_ld_name;
13544
13545 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13546 if (NONDEBUG_INSN_P (insn)
13547 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13548 return cfun->machine->some_ld_name;
13549
13550 return NULL;
13551 }
13552
13553 /* Meaning of CODE:
13554 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13555 C -- print opcode suffix for set/cmov insn.
13556 c -- like C, but print reversed condition
13557 F,f -- likewise, but for floating-point.
13558 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13559 otherwise nothing
13560 R -- print the prefix for register names.
13561 z -- print the opcode suffix for the size of the current operand.
13562 Z -- likewise, with special suffixes for x87 instructions.
13563 * -- print a star (in certain assembler syntax)
13564 A -- print an absolute memory reference.
13565 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13566 s -- print a shift double count, followed by the assemblers argument
13567 delimiter.
13568 b -- print the QImode name of the register for the indicated operand.
13569 %b0 would print %al if operands[0] is reg 0.
13570 w -- likewise, print the HImode name of the register.
13571 k -- likewise, print the SImode name of the register.
13572 q -- likewise, print the DImode name of the register.
13573 x -- likewise, print the V4SFmode name of the register.
13574 t -- likewise, print the V8SFmode name of the register.
13575 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13576 y -- print "st(0)" instead of "st" as a register.
13577 d -- print duplicated register operand for AVX instruction.
13578 D -- print condition for SSE cmp instruction.
13579 P -- if PIC, print an @PLT suffix.
13580 p -- print raw symbol name.
13581 X -- don't print any sort of PIC '@' suffix for a symbol.
13582 & -- print some in-use local-dynamic symbol name.
13583 H -- print a memory address offset by 8; used for sse high-parts
13584 Y -- print condition for XOP pcom* instruction.
13585 + -- print a branch hint as 'cs' or 'ds' prefix
13586 ; -- print a semicolon (after prefixes due to bug in older gas).
13587 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13588 @ -- print a segment register of thread base pointer load
13589 */
13590
13591 void
13592 ix86_print_operand (FILE *file, rtx x, int code)
13593 {
13594 if (code)
13595 {
13596 switch (code)
13597 {
13598 case '*':
13599 if (ASSEMBLER_DIALECT == ASM_ATT)
13600 putc ('*', file);
13601 return;
13602
13603 case '&':
13604 {
13605 const char *name = get_some_local_dynamic_name ();
13606 if (name == NULL)
13607 output_operand_lossage ("'%%&' used without any "
13608 "local dynamic TLS references");
13609 else
13610 assemble_name (file, name);
13611 return;
13612 }
13613
13614 case 'A':
13615 switch (ASSEMBLER_DIALECT)
13616 {
13617 case ASM_ATT:
13618 putc ('*', file);
13619 break;
13620
13621 case ASM_INTEL:
13622 /* Intel syntax. For absolute addresses, registers should not
13623 be surrounded by braces. */
13624 if (!REG_P (x))
13625 {
13626 putc ('[', file);
13627 ix86_print_operand (file, x, 0);
13628 putc (']', file);
13629 return;
13630 }
13631 break;
13632
13633 default:
13634 gcc_unreachable ();
13635 }
13636
13637 ix86_print_operand (file, x, 0);
13638 return;
13639
13640
13641 case 'L':
13642 if (ASSEMBLER_DIALECT == ASM_ATT)
13643 putc ('l', file);
13644 return;
13645
13646 case 'W':
13647 if (ASSEMBLER_DIALECT == ASM_ATT)
13648 putc ('w', file);
13649 return;
13650
13651 case 'B':
13652 if (ASSEMBLER_DIALECT == ASM_ATT)
13653 putc ('b', file);
13654 return;
13655
13656 case 'Q':
13657 if (ASSEMBLER_DIALECT == ASM_ATT)
13658 putc ('l', file);
13659 return;
13660
13661 case 'S':
13662 if (ASSEMBLER_DIALECT == ASM_ATT)
13663 putc ('s', file);
13664 return;
13665
13666 case 'T':
13667 if (ASSEMBLER_DIALECT == ASM_ATT)
13668 putc ('t', file);
13669 return;
13670
13671 case 'z':
13672 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13673 {
13674 /* Opcodes don't get size suffixes if using Intel opcodes. */
13675 if (ASSEMBLER_DIALECT == ASM_INTEL)
13676 return;
13677
13678 switch (GET_MODE_SIZE (GET_MODE (x)))
13679 {
13680 case 1:
13681 putc ('b', file);
13682 return;
13683
13684 case 2:
13685 putc ('w', file);
13686 return;
13687
13688 case 4:
13689 putc ('l', file);
13690 return;
13691
13692 case 8:
13693 putc ('q', file);
13694 return;
13695
13696 default:
13697 output_operand_lossage
13698 ("invalid operand size for operand code '%c'", code);
13699 return;
13700 }
13701 }
13702
13703 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13704 warning
13705 (0, "non-integer operand used with operand code '%c'", code);
13706 /* FALLTHRU */
13707
13708 case 'Z':
13709 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13710 if (ASSEMBLER_DIALECT == ASM_INTEL)
13711 return;
13712
13713 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13714 {
13715 switch (GET_MODE_SIZE (GET_MODE (x)))
13716 {
13717 case 2:
13718 #ifdef HAVE_AS_IX86_FILDS
13719 putc ('s', file);
13720 #endif
13721 return;
13722
13723 case 4:
13724 putc ('l', file);
13725 return;
13726
13727 case 8:
13728 #ifdef HAVE_AS_IX86_FILDQ
13729 putc ('q', file);
13730 #else
13731 fputs ("ll", file);
13732 #endif
13733 return;
13734
13735 default:
13736 break;
13737 }
13738 }
13739 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13740 {
13741 /* 387 opcodes don't get size suffixes
13742 if the operands are registers. */
13743 if (STACK_REG_P (x))
13744 return;
13745
13746 switch (GET_MODE_SIZE (GET_MODE (x)))
13747 {
13748 case 4:
13749 putc ('s', file);
13750 return;
13751
13752 case 8:
13753 putc ('l', file);
13754 return;
13755
13756 case 12:
13757 case 16:
13758 putc ('t', file);
13759 return;
13760
13761 default:
13762 break;
13763 }
13764 }
13765 else
13766 {
13767 output_operand_lossage
13768 ("invalid operand type used with operand code '%c'", code);
13769 return;
13770 }
13771
13772 output_operand_lossage
13773 ("invalid operand size for operand code '%c'", code);
13774 return;
13775
13776 case 'd':
13777 case 'b':
13778 case 'w':
13779 case 'k':
13780 case 'q':
13781 case 'h':
13782 case 't':
13783 case 'y':
13784 case 'x':
13785 case 'X':
13786 case 'P':
13787 case 'p':
13788 break;
13789
13790 case 's':
13791 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13792 {
13793 ix86_print_operand (file, x, 0);
13794 fputs (", ", file);
13795 }
13796 return;
13797
13798 case 'D':
13799 /* Little bit of braindamage here. The SSE compare instructions
13800 does use completely different names for the comparisons that the
13801 fp conditional moves. */
13802 if (TARGET_AVX)
13803 {
13804 switch (GET_CODE (x))
13805 {
13806 case EQ:
13807 fputs ("eq", file);
13808 break;
13809 case UNEQ:
13810 fputs ("eq_us", file);
13811 break;
13812 case LT:
13813 fputs ("lt", file);
13814 break;
13815 case UNLT:
13816 fputs ("nge", file);
13817 break;
13818 case LE:
13819 fputs ("le", file);
13820 break;
13821 case UNLE:
13822 fputs ("ngt", file);
13823 break;
13824 case UNORDERED:
13825 fputs ("unord", file);
13826 break;
13827 case NE:
13828 fputs ("neq", file);
13829 break;
13830 case LTGT:
13831 fputs ("neq_oq", file);
13832 break;
13833 case GE:
13834 fputs ("ge", file);
13835 break;
13836 case UNGE:
13837 fputs ("nlt", file);
13838 break;
13839 case GT:
13840 fputs ("gt", file);
13841 break;
13842 case UNGT:
13843 fputs ("nle", file);
13844 break;
13845 case ORDERED:
13846 fputs ("ord", file);
13847 break;
13848 default:
13849 output_operand_lossage ("operand is not a condition code, "
13850 "invalid operand code 'D'");
13851 return;
13852 }
13853 }
13854 else
13855 {
13856 switch (GET_CODE (x))
13857 {
13858 case EQ:
13859 case UNEQ:
13860 fputs ("eq", file);
13861 break;
13862 case LT:
13863 case UNLT:
13864 fputs ("lt", file);
13865 break;
13866 case LE:
13867 case UNLE:
13868 fputs ("le", file);
13869 break;
13870 case UNORDERED:
13871 fputs ("unord", file);
13872 break;
13873 case NE:
13874 case LTGT:
13875 fputs ("neq", file);
13876 break;
13877 case UNGE:
13878 case GE:
13879 fputs ("nlt", file);
13880 break;
13881 case UNGT:
13882 case GT:
13883 fputs ("nle", file);
13884 break;
13885 case ORDERED:
13886 fputs ("ord", file);
13887 break;
13888 default:
13889 output_operand_lossage ("operand is not a condition code, "
13890 "invalid operand code 'D'");
13891 return;
13892 }
13893 }
13894 return;
13895 case 'O':
13896 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13897 if (ASSEMBLER_DIALECT == ASM_ATT)
13898 {
13899 switch (GET_MODE (x))
13900 {
13901 case HImode: putc ('w', file); break;
13902 case SImode:
13903 case SFmode: putc ('l', file); break;
13904 case DImode:
13905 case DFmode: putc ('q', file); break;
13906 default: gcc_unreachable ();
13907 }
13908 putc ('.', file);
13909 }
13910 #endif
13911 return;
13912 case 'C':
13913 if (!COMPARISON_P (x))
13914 {
13915 output_operand_lossage ("operand is neither a constant nor a "
13916 "condition code, invalid operand code "
13917 "'C'");
13918 return;
13919 }
13920 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13921 return;
13922 case 'F':
13923 if (!COMPARISON_P (x))
13924 {
13925 output_operand_lossage ("operand is neither a constant nor a "
13926 "condition code, invalid operand code "
13927 "'F'");
13928 return;
13929 }
13930 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13931 if (ASSEMBLER_DIALECT == ASM_ATT)
13932 putc ('.', file);
13933 #endif
13934 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13935 return;
13936
13937 /* Like above, but reverse condition */
13938 case 'c':
13939 /* Check to see if argument to %c is really a constant
13940 and not a condition code which needs to be reversed. */
13941 if (!COMPARISON_P (x))
13942 {
13943 output_operand_lossage ("operand is neither a constant nor a "
13944 "condition code, invalid operand "
13945 "code 'c'");
13946 return;
13947 }
13948 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13949 return;
13950 case 'f':
13951 if (!COMPARISON_P (x))
13952 {
13953 output_operand_lossage ("operand is neither a constant nor a "
13954 "condition code, invalid operand "
13955 "code 'f'");
13956 return;
13957 }
13958 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13959 if (ASSEMBLER_DIALECT == ASM_ATT)
13960 putc ('.', file);
13961 #endif
13962 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13963 return;
13964
13965 case 'H':
13966 /* It doesn't actually matter what mode we use here, as we're
13967 only going to use this for printing. */
13968 x = adjust_address_nv (x, DImode, 8);
13969 break;
13970
13971 case '+':
13972 {
13973 rtx x;
13974
13975 if (!optimize
13976 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13977 return;
13978
13979 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13980 if (x)
13981 {
13982 int pred_val = INTVAL (XEXP (x, 0));
13983
13984 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13985 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13986 {
13987 int taken = pred_val > REG_BR_PROB_BASE / 2;
13988 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13989
13990 /* Emit hints only in the case default branch prediction
13991 heuristics would fail. */
13992 if (taken != cputaken)
13993 {
13994 /* We use 3e (DS) prefix for taken branches and
13995 2e (CS) prefix for not taken branches. */
13996 if (taken)
13997 fputs ("ds ; ", file);
13998 else
13999 fputs ("cs ; ", file);
14000 }
14001 }
14002 }
14003 return;
14004 }
14005
14006 case 'Y':
14007 switch (GET_CODE (x))
14008 {
14009 case NE:
14010 fputs ("neq", file);
14011 break;
14012 case EQ:
14013 fputs ("eq", file);
14014 break;
14015 case GE:
14016 case GEU:
14017 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14018 break;
14019 case GT:
14020 case GTU:
14021 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14022 break;
14023 case LE:
14024 case LEU:
14025 fputs ("le", file);
14026 break;
14027 case LT:
14028 case LTU:
14029 fputs ("lt", file);
14030 break;
14031 case UNORDERED:
14032 fputs ("unord", file);
14033 break;
14034 case ORDERED:
14035 fputs ("ord", file);
14036 break;
14037 case UNEQ:
14038 fputs ("ueq", file);
14039 break;
14040 case UNGE:
14041 fputs ("nlt", file);
14042 break;
14043 case UNGT:
14044 fputs ("nle", file);
14045 break;
14046 case UNLE:
14047 fputs ("ule", file);
14048 break;
14049 case UNLT:
14050 fputs ("ult", file);
14051 break;
14052 case LTGT:
14053 fputs ("une", file);
14054 break;
14055 default:
14056 output_operand_lossage ("operand is not a condition code, "
14057 "invalid operand code 'Y'");
14058 return;
14059 }
14060 return;
14061
14062 case ';':
14063 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14064 putc (';', file);
14065 #endif
14066 return;
14067
14068 case '@':
14069 if (ASSEMBLER_DIALECT == ASM_ATT)
14070 putc ('%', file);
14071
14072 /* The kernel uses a different segment register for performance
14073 reasons; a system call would not have to trash the userspace
14074 segment register, which would be expensive. */
14075 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14076 fputs ("fs", file);
14077 else
14078 fputs ("gs", file);
14079 return;
14080
14081 case '~':
14082 putc (TARGET_AVX2 ? 'i' : 'f', file);
14083 return;
14084
14085 default:
14086 output_operand_lossage ("invalid operand code '%c'", code);
14087 }
14088 }
14089
14090 if (REG_P (x))
14091 print_reg (x, code, file);
14092
14093 else if (MEM_P (x))
14094 {
14095 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14096 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14097 && GET_MODE (x) != BLKmode)
14098 {
14099 const char * size;
14100 switch (GET_MODE_SIZE (GET_MODE (x)))
14101 {
14102 case 1: size = "BYTE"; break;
14103 case 2: size = "WORD"; break;
14104 case 4: size = "DWORD"; break;
14105 case 8: size = "QWORD"; break;
14106 case 12: size = "TBYTE"; break;
14107 case 16:
14108 if (GET_MODE (x) == XFmode)
14109 size = "TBYTE";
14110 else
14111 size = "XMMWORD";
14112 break;
14113 case 32: size = "YMMWORD"; break;
14114 default:
14115 gcc_unreachable ();
14116 }
14117
14118 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14119 if (code == 'b')
14120 size = "BYTE";
14121 else if (code == 'w')
14122 size = "WORD";
14123 else if (code == 'k')
14124 size = "DWORD";
14125
14126 fputs (size, file);
14127 fputs (" PTR ", file);
14128 }
14129
14130 x = XEXP (x, 0);
14131 /* Avoid (%rip) for call operands. */
14132 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14133 && !CONST_INT_P (x))
14134 output_addr_const (file, x);
14135 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14136 output_operand_lossage ("invalid constraints for operand");
14137 else
14138 output_address (x);
14139 }
14140
14141 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14142 {
14143 REAL_VALUE_TYPE r;
14144 long l;
14145
14146 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14147 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14148
14149 if (ASSEMBLER_DIALECT == ASM_ATT)
14150 putc ('$', file);
14151 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14152 if (code == 'q')
14153 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14154 else
14155 fprintf (file, "0x%08x", (unsigned int) l);
14156 }
14157
14158 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14159 {
14160 REAL_VALUE_TYPE r;
14161 long l[2];
14162
14163 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14164 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14165
14166 if (ASSEMBLER_DIALECT == ASM_ATT)
14167 putc ('$', file);
14168 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14169 }
14170
14171 /* These float cases don't actually occur as immediate operands. */
14172 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14173 {
14174 char dstr[30];
14175
14176 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14177 fputs (dstr, file);
14178 }
14179
14180 else
14181 {
14182 /* We have patterns that allow zero sets of memory, for instance.
14183 In 64-bit mode, we should probably support all 8-byte vectors,
14184 since we can in fact encode that into an immediate. */
14185 if (GET_CODE (x) == CONST_VECTOR)
14186 {
14187 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14188 x = const0_rtx;
14189 }
14190
14191 if (code != 'P' && code != 'p')
14192 {
14193 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14194 {
14195 if (ASSEMBLER_DIALECT == ASM_ATT)
14196 putc ('$', file);
14197 }
14198 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14199 || GET_CODE (x) == LABEL_REF)
14200 {
14201 if (ASSEMBLER_DIALECT == ASM_ATT)
14202 putc ('$', file);
14203 else
14204 fputs ("OFFSET FLAT:", file);
14205 }
14206 }
14207 if (CONST_INT_P (x))
14208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14209 else if (flag_pic || MACHOPIC_INDIRECT)
14210 output_pic_addr_const (file, x, code);
14211 else
14212 output_addr_const (file, x);
14213 }
14214 }
14215
14216 static bool
14217 ix86_print_operand_punct_valid_p (unsigned char code)
14218 {
14219 return (code == '@' || code == '*' || code == '+'
14220 || code == '&' || code == ';' || code == '~');
14221 }
14222 \f
14223 /* Print a memory operand whose address is ADDR. */
14224
14225 static void
14226 ix86_print_operand_address (FILE *file, rtx addr)
14227 {
14228 struct ix86_address parts;
14229 rtx base, index, disp;
14230 int scale;
14231 int ok = ix86_decompose_address (addr, &parts);
14232
14233 gcc_assert (ok);
14234
14235 if (parts.base && GET_CODE (parts.base) == SUBREG)
14236 {
14237 rtx tmp = SUBREG_REG (parts.base);
14238 parts.base = simplify_subreg (GET_MODE (parts.base),
14239 tmp, GET_MODE (tmp), 0);
14240 }
14241
14242 if (parts.index && GET_CODE (parts.index) == SUBREG)
14243 {
14244 rtx tmp = SUBREG_REG (parts.index);
14245 parts.index = simplify_subreg (GET_MODE (parts.index),
14246 tmp, GET_MODE (tmp), 0);
14247 }
14248
14249 base = parts.base;
14250 index = parts.index;
14251 disp = parts.disp;
14252 scale = parts.scale;
14253
14254 switch (parts.seg)
14255 {
14256 case SEG_DEFAULT:
14257 break;
14258 case SEG_FS:
14259 case SEG_GS:
14260 if (ASSEMBLER_DIALECT == ASM_ATT)
14261 putc ('%', file);
14262 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14263 break;
14264 default:
14265 gcc_unreachable ();
14266 }
14267
14268 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14269 if (TARGET_64BIT && !base && !index)
14270 {
14271 rtx symbol = disp;
14272
14273 if (GET_CODE (disp) == CONST
14274 && GET_CODE (XEXP (disp, 0)) == PLUS
14275 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14276 symbol = XEXP (XEXP (disp, 0), 0);
14277
14278 if (GET_CODE (symbol) == LABEL_REF
14279 || (GET_CODE (symbol) == SYMBOL_REF
14280 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14281 base = pc_rtx;
14282 }
14283 if (!base && !index)
14284 {
14285 /* Displacement only requires special attention. */
14286
14287 if (CONST_INT_P (disp))
14288 {
14289 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14290 fputs ("ds:", file);
14291 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14292 }
14293 else if (flag_pic)
14294 output_pic_addr_const (file, disp, 0);
14295 else
14296 output_addr_const (file, disp);
14297 }
14298 else
14299 {
14300 int code = 0;
14301
14302 /* Print SImode registers for zero-extended addresses to force
14303 addr32 prefix. Otherwise print DImode registers to avoid it. */
14304 if (TARGET_64BIT)
14305 code = ((GET_CODE (addr) == ZERO_EXTEND
14306 || GET_CODE (addr) == AND)
14307 ? 'l'
14308 : 'q');
14309
14310 if (ASSEMBLER_DIALECT == ASM_ATT)
14311 {
14312 if (disp)
14313 {
14314 if (flag_pic)
14315 output_pic_addr_const (file, disp, 0);
14316 else if (GET_CODE (disp) == LABEL_REF)
14317 output_asm_label (disp);
14318 else
14319 output_addr_const (file, disp);
14320 }
14321
14322 putc ('(', file);
14323 if (base)
14324 print_reg (base, code, file);
14325 if (index)
14326 {
14327 putc (',', file);
14328 print_reg (index, code, file);
14329 if (scale != 1)
14330 fprintf (file, ",%d", scale);
14331 }
14332 putc (')', file);
14333 }
14334 else
14335 {
14336 rtx offset = NULL_RTX;
14337
14338 if (disp)
14339 {
14340 /* Pull out the offset of a symbol; print any symbol itself. */
14341 if (GET_CODE (disp) == CONST
14342 && GET_CODE (XEXP (disp, 0)) == PLUS
14343 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14344 {
14345 offset = XEXP (XEXP (disp, 0), 1);
14346 disp = gen_rtx_CONST (VOIDmode,
14347 XEXP (XEXP (disp, 0), 0));
14348 }
14349
14350 if (flag_pic)
14351 output_pic_addr_const (file, disp, 0);
14352 else if (GET_CODE (disp) == LABEL_REF)
14353 output_asm_label (disp);
14354 else if (CONST_INT_P (disp))
14355 offset = disp;
14356 else
14357 output_addr_const (file, disp);
14358 }
14359
14360 putc ('[', file);
14361 if (base)
14362 {
14363 print_reg (base, code, file);
14364 if (offset)
14365 {
14366 if (INTVAL (offset) >= 0)
14367 putc ('+', file);
14368 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14369 }
14370 }
14371 else if (offset)
14372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14373 else
14374 putc ('0', file);
14375
14376 if (index)
14377 {
14378 putc ('+', file);
14379 print_reg (index, code, file);
14380 if (scale != 1)
14381 fprintf (file, "*%d", scale);
14382 }
14383 putc (']', file);
14384 }
14385 }
14386 }
14387
14388 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14389
14390 static bool
14391 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14392 {
14393 rtx op;
14394
14395 if (GET_CODE (x) != UNSPEC)
14396 return false;
14397
14398 op = XVECEXP (x, 0, 0);
14399 switch (XINT (x, 1))
14400 {
14401 case UNSPEC_GOTTPOFF:
14402 output_addr_const (file, op);
14403 /* FIXME: This might be @TPOFF in Sun ld. */
14404 fputs ("@gottpoff", file);
14405 break;
14406 case UNSPEC_TPOFF:
14407 output_addr_const (file, op);
14408 fputs ("@tpoff", file);
14409 break;
14410 case UNSPEC_NTPOFF:
14411 output_addr_const (file, op);
14412 if (TARGET_64BIT)
14413 fputs ("@tpoff", file);
14414 else
14415 fputs ("@ntpoff", file);
14416 break;
14417 case UNSPEC_DTPOFF:
14418 output_addr_const (file, op);
14419 fputs ("@dtpoff", file);
14420 break;
14421 case UNSPEC_GOTNTPOFF:
14422 output_addr_const (file, op);
14423 if (TARGET_64BIT)
14424 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14425 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14426 else
14427 fputs ("@gotntpoff", file);
14428 break;
14429 case UNSPEC_INDNTPOFF:
14430 output_addr_const (file, op);
14431 fputs ("@indntpoff", file);
14432 break;
14433 #if TARGET_MACHO
14434 case UNSPEC_MACHOPIC_OFFSET:
14435 output_addr_const (file, op);
14436 putc ('-', file);
14437 machopic_output_function_base_name (file);
14438 break;
14439 #endif
14440
14441 case UNSPEC_STACK_CHECK:
14442 {
14443 int offset;
14444
14445 gcc_assert (flag_split_stack);
14446
14447 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14448 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14449 #else
14450 gcc_unreachable ();
14451 #endif
14452
14453 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14454 }
14455 break;
14456
14457 default:
14458 return false;
14459 }
14460
14461 return true;
14462 }
14463 \f
14464 /* Split one or more double-mode RTL references into pairs of half-mode
14465 references. The RTL can be REG, offsettable MEM, integer constant, or
14466 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14467 split and "num" is its length. lo_half and hi_half are output arrays
14468 that parallel "operands". */
14469
14470 void
14471 split_double_mode (enum machine_mode mode, rtx operands[],
14472 int num, rtx lo_half[], rtx hi_half[])
14473 {
14474 enum machine_mode half_mode;
14475 unsigned int byte;
14476
14477 switch (mode)
14478 {
14479 case TImode:
14480 half_mode = DImode;
14481 break;
14482 case DImode:
14483 half_mode = SImode;
14484 break;
14485 default:
14486 gcc_unreachable ();
14487 }
14488
14489 byte = GET_MODE_SIZE (half_mode);
14490
14491 while (num--)
14492 {
14493 rtx op = operands[num];
14494
14495 /* simplify_subreg refuse to split volatile memory addresses,
14496 but we still have to handle it. */
14497 if (MEM_P (op))
14498 {
14499 lo_half[num] = adjust_address (op, half_mode, 0);
14500 hi_half[num] = adjust_address (op, half_mode, byte);
14501 }
14502 else
14503 {
14504 lo_half[num] = simplify_gen_subreg (half_mode, op,
14505 GET_MODE (op) == VOIDmode
14506 ? mode : GET_MODE (op), 0);
14507 hi_half[num] = simplify_gen_subreg (half_mode, op,
14508 GET_MODE (op) == VOIDmode
14509 ? mode : GET_MODE (op), byte);
14510 }
14511 }
14512 }
14513 \f
14514 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14515 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14516 is the expression of the binary operation. The output may either be
14517 emitted here, or returned to the caller, like all output_* functions.
14518
14519 There is no guarantee that the operands are the same mode, as they
14520 might be within FLOAT or FLOAT_EXTEND expressions. */
14521
14522 #ifndef SYSV386_COMPAT
14523 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14524 wants to fix the assemblers because that causes incompatibility
14525 with gcc. No-one wants to fix gcc because that causes
14526 incompatibility with assemblers... You can use the option of
14527 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14528 #define SYSV386_COMPAT 1
14529 #endif
14530
14531 const char *
14532 output_387_binary_op (rtx insn, rtx *operands)
14533 {
14534 static char buf[40];
14535 const char *p;
14536 const char *ssep;
14537 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14538
14539 #ifdef ENABLE_CHECKING
14540 /* Even if we do not want to check the inputs, this documents input
14541 constraints. Which helps in understanding the following code. */
14542 if (STACK_REG_P (operands[0])
14543 && ((REG_P (operands[1])
14544 && REGNO (operands[0]) == REGNO (operands[1])
14545 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14546 || (REG_P (operands[2])
14547 && REGNO (operands[0]) == REGNO (operands[2])
14548 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14549 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14550 ; /* ok */
14551 else
14552 gcc_assert (is_sse);
14553 #endif
14554
14555 switch (GET_CODE (operands[3]))
14556 {
14557 case PLUS:
14558 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14559 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14560 p = "fiadd";
14561 else
14562 p = "fadd";
14563 ssep = "vadd";
14564 break;
14565
14566 case MINUS:
14567 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14568 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14569 p = "fisub";
14570 else
14571 p = "fsub";
14572 ssep = "vsub";
14573 break;
14574
14575 case MULT:
14576 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14577 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14578 p = "fimul";
14579 else
14580 p = "fmul";
14581 ssep = "vmul";
14582 break;
14583
14584 case DIV:
14585 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14586 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14587 p = "fidiv";
14588 else
14589 p = "fdiv";
14590 ssep = "vdiv";
14591 break;
14592
14593 default:
14594 gcc_unreachable ();
14595 }
14596
14597 if (is_sse)
14598 {
14599 if (TARGET_AVX)
14600 {
14601 strcpy (buf, ssep);
14602 if (GET_MODE (operands[0]) == SFmode)
14603 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14604 else
14605 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14606 }
14607 else
14608 {
14609 strcpy (buf, ssep + 1);
14610 if (GET_MODE (operands[0]) == SFmode)
14611 strcat (buf, "ss\t{%2, %0|%0, %2}");
14612 else
14613 strcat (buf, "sd\t{%2, %0|%0, %2}");
14614 }
14615 return buf;
14616 }
14617 strcpy (buf, p);
14618
14619 switch (GET_CODE (operands[3]))
14620 {
14621 case MULT:
14622 case PLUS:
14623 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14624 {
14625 rtx temp = operands[2];
14626 operands[2] = operands[1];
14627 operands[1] = temp;
14628 }
14629
14630 /* know operands[0] == operands[1]. */
14631
14632 if (MEM_P (operands[2]))
14633 {
14634 p = "%Z2\t%2";
14635 break;
14636 }
14637
14638 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14639 {
14640 if (STACK_TOP_P (operands[0]))
14641 /* How is it that we are storing to a dead operand[2]?
14642 Well, presumably operands[1] is dead too. We can't
14643 store the result to st(0) as st(0) gets popped on this
14644 instruction. Instead store to operands[2] (which I
14645 think has to be st(1)). st(1) will be popped later.
14646 gcc <= 2.8.1 didn't have this check and generated
14647 assembly code that the Unixware assembler rejected. */
14648 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14649 else
14650 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14651 break;
14652 }
14653
14654 if (STACK_TOP_P (operands[0]))
14655 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14656 else
14657 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14658 break;
14659
14660 case MINUS:
14661 case DIV:
14662 if (MEM_P (operands[1]))
14663 {
14664 p = "r%Z1\t%1";
14665 break;
14666 }
14667
14668 if (MEM_P (operands[2]))
14669 {
14670 p = "%Z2\t%2";
14671 break;
14672 }
14673
14674 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14675 {
14676 #if SYSV386_COMPAT
14677 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14678 derived assemblers, confusingly reverse the direction of
14679 the operation for fsub{r} and fdiv{r} when the
14680 destination register is not st(0). The Intel assembler
14681 doesn't have this brain damage. Read !SYSV386_COMPAT to
14682 figure out what the hardware really does. */
14683 if (STACK_TOP_P (operands[0]))
14684 p = "{p\t%0, %2|rp\t%2, %0}";
14685 else
14686 p = "{rp\t%2, %0|p\t%0, %2}";
14687 #else
14688 if (STACK_TOP_P (operands[0]))
14689 /* As above for fmul/fadd, we can't store to st(0). */
14690 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14691 else
14692 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14693 #endif
14694 break;
14695 }
14696
14697 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14698 {
14699 #if SYSV386_COMPAT
14700 if (STACK_TOP_P (operands[0]))
14701 p = "{rp\t%0, %1|p\t%1, %0}";
14702 else
14703 p = "{p\t%1, %0|rp\t%0, %1}";
14704 #else
14705 if (STACK_TOP_P (operands[0]))
14706 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14707 else
14708 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14709 #endif
14710 break;
14711 }
14712
14713 if (STACK_TOP_P (operands[0]))
14714 {
14715 if (STACK_TOP_P (operands[1]))
14716 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14717 else
14718 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14719 break;
14720 }
14721 else if (STACK_TOP_P (operands[1]))
14722 {
14723 #if SYSV386_COMPAT
14724 p = "{\t%1, %0|r\t%0, %1}";
14725 #else
14726 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14727 #endif
14728 }
14729 else
14730 {
14731 #if SYSV386_COMPAT
14732 p = "{r\t%2, %0|\t%0, %2}";
14733 #else
14734 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14735 #endif
14736 }
14737 break;
14738
14739 default:
14740 gcc_unreachable ();
14741 }
14742
14743 strcat (buf, p);
14744 return buf;
14745 }
14746
14747 /* Return needed mode for entity in optimize_mode_switching pass. */
14748
14749 int
14750 ix86_mode_needed (int entity, rtx insn)
14751 {
14752 enum attr_i387_cw mode;
14753
14754 /* The mode UNINITIALIZED is used to store control word after a
14755 function call or ASM pattern. The mode ANY specify that function
14756 has no requirements on the control word and make no changes in the
14757 bits we are interested in. */
14758
14759 if (CALL_P (insn)
14760 || (NONJUMP_INSN_P (insn)
14761 && (asm_noperands (PATTERN (insn)) >= 0
14762 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14763 return I387_CW_UNINITIALIZED;
14764
14765 if (recog_memoized (insn) < 0)
14766 return I387_CW_ANY;
14767
14768 mode = get_attr_i387_cw (insn);
14769
14770 switch (entity)
14771 {
14772 case I387_TRUNC:
14773 if (mode == I387_CW_TRUNC)
14774 return mode;
14775 break;
14776
14777 case I387_FLOOR:
14778 if (mode == I387_CW_FLOOR)
14779 return mode;
14780 break;
14781
14782 case I387_CEIL:
14783 if (mode == I387_CW_CEIL)
14784 return mode;
14785 break;
14786
14787 case I387_MASK_PM:
14788 if (mode == I387_CW_MASK_PM)
14789 return mode;
14790 break;
14791
14792 default:
14793 gcc_unreachable ();
14794 }
14795
14796 return I387_CW_ANY;
14797 }
14798
14799 /* Output code to initialize control word copies used by trunc?f?i and
14800 rounding patterns. CURRENT_MODE is set to current control word,
14801 while NEW_MODE is set to new control word. */
14802
14803 void
14804 emit_i387_cw_initialization (int mode)
14805 {
14806 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14807 rtx new_mode;
14808
14809 enum ix86_stack_slot slot;
14810
14811 rtx reg = gen_reg_rtx (HImode);
14812
14813 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14814 emit_move_insn (reg, copy_rtx (stored_mode));
14815
14816 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14817 || optimize_function_for_size_p (cfun))
14818 {
14819 switch (mode)
14820 {
14821 case I387_CW_TRUNC:
14822 /* round toward zero (truncate) */
14823 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14824 slot = SLOT_CW_TRUNC;
14825 break;
14826
14827 case I387_CW_FLOOR:
14828 /* round down toward -oo */
14829 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14830 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14831 slot = SLOT_CW_FLOOR;
14832 break;
14833
14834 case I387_CW_CEIL:
14835 /* round up toward +oo */
14836 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14837 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14838 slot = SLOT_CW_CEIL;
14839 break;
14840
14841 case I387_CW_MASK_PM:
14842 /* mask precision exception for nearbyint() */
14843 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14844 slot = SLOT_CW_MASK_PM;
14845 break;
14846
14847 default:
14848 gcc_unreachable ();
14849 }
14850 }
14851 else
14852 {
14853 switch (mode)
14854 {
14855 case I387_CW_TRUNC:
14856 /* round toward zero (truncate) */
14857 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14858 slot = SLOT_CW_TRUNC;
14859 break;
14860
14861 case I387_CW_FLOOR:
14862 /* round down toward -oo */
14863 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14864 slot = SLOT_CW_FLOOR;
14865 break;
14866
14867 case I387_CW_CEIL:
14868 /* round up toward +oo */
14869 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14870 slot = SLOT_CW_CEIL;
14871 break;
14872
14873 case I387_CW_MASK_PM:
14874 /* mask precision exception for nearbyint() */
14875 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14876 slot = SLOT_CW_MASK_PM;
14877 break;
14878
14879 default:
14880 gcc_unreachable ();
14881 }
14882 }
14883
14884 gcc_assert (slot < MAX_386_STACK_LOCALS);
14885
14886 new_mode = assign_386_stack_local (HImode, slot);
14887 emit_move_insn (new_mode, reg);
14888 }
14889
14890 /* Output code for INSN to convert a float to a signed int. OPERANDS
14891 are the insn operands. The output may be [HSD]Imode and the input
14892 operand may be [SDX]Fmode. */
14893
14894 const char *
14895 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14896 {
14897 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14898 int dimode_p = GET_MODE (operands[0]) == DImode;
14899 int round_mode = get_attr_i387_cw (insn);
14900
14901 /* Jump through a hoop or two for DImode, since the hardware has no
14902 non-popping instruction. We used to do this a different way, but
14903 that was somewhat fragile and broke with post-reload splitters. */
14904 if ((dimode_p || fisttp) && !stack_top_dies)
14905 output_asm_insn ("fld\t%y1", operands);
14906
14907 gcc_assert (STACK_TOP_P (operands[1]));
14908 gcc_assert (MEM_P (operands[0]));
14909 gcc_assert (GET_MODE (operands[1]) != TFmode);
14910
14911 if (fisttp)
14912 output_asm_insn ("fisttp%Z0\t%0", operands);
14913 else
14914 {
14915 if (round_mode != I387_CW_ANY)
14916 output_asm_insn ("fldcw\t%3", operands);
14917 if (stack_top_dies || dimode_p)
14918 output_asm_insn ("fistp%Z0\t%0", operands);
14919 else
14920 output_asm_insn ("fist%Z0\t%0", operands);
14921 if (round_mode != I387_CW_ANY)
14922 output_asm_insn ("fldcw\t%2", operands);
14923 }
14924
14925 return "";
14926 }
14927
14928 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14929 have the values zero or one, indicates the ffreep insn's operand
14930 from the OPERANDS array. */
14931
14932 static const char *
14933 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14934 {
14935 if (TARGET_USE_FFREEP)
14936 #ifdef HAVE_AS_IX86_FFREEP
14937 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14938 #else
14939 {
14940 static char retval[32];
14941 int regno = REGNO (operands[opno]);
14942
14943 gcc_assert (FP_REGNO_P (regno));
14944
14945 regno -= FIRST_STACK_REG;
14946
14947 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14948 return retval;
14949 }
14950 #endif
14951
14952 return opno ? "fstp\t%y1" : "fstp\t%y0";
14953 }
14954
14955
14956 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14957 should be used. UNORDERED_P is true when fucom should be used. */
14958
14959 const char *
14960 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14961 {
14962 int stack_top_dies;
14963 rtx cmp_op0, cmp_op1;
14964 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14965
14966 if (eflags_p)
14967 {
14968 cmp_op0 = operands[0];
14969 cmp_op1 = operands[1];
14970 }
14971 else
14972 {
14973 cmp_op0 = operands[1];
14974 cmp_op1 = operands[2];
14975 }
14976
14977 if (is_sse)
14978 {
14979 if (GET_MODE (operands[0]) == SFmode)
14980 if (unordered_p)
14981 return "%vucomiss\t{%1, %0|%0, %1}";
14982 else
14983 return "%vcomiss\t{%1, %0|%0, %1}";
14984 else
14985 if (unordered_p)
14986 return "%vucomisd\t{%1, %0|%0, %1}";
14987 else
14988 return "%vcomisd\t{%1, %0|%0, %1}";
14989 }
14990
14991 gcc_assert (STACK_TOP_P (cmp_op0));
14992
14993 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14994
14995 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14996 {
14997 if (stack_top_dies)
14998 {
14999 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15000 return output_387_ffreep (operands, 1);
15001 }
15002 else
15003 return "ftst\n\tfnstsw\t%0";
15004 }
15005
15006 if (STACK_REG_P (cmp_op1)
15007 && stack_top_dies
15008 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15009 && REGNO (cmp_op1) != FIRST_STACK_REG)
15010 {
15011 /* If both the top of the 387 stack dies, and the other operand
15012 is also a stack register that dies, then this must be a
15013 `fcompp' float compare */
15014
15015 if (eflags_p)
15016 {
15017 /* There is no double popping fcomi variant. Fortunately,
15018 eflags is immune from the fstp's cc clobbering. */
15019 if (unordered_p)
15020 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15021 else
15022 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15023 return output_387_ffreep (operands, 0);
15024 }
15025 else
15026 {
15027 if (unordered_p)
15028 return "fucompp\n\tfnstsw\t%0";
15029 else
15030 return "fcompp\n\tfnstsw\t%0";
15031 }
15032 }
15033 else
15034 {
15035 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15036
15037 static const char * const alt[16] =
15038 {
15039 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15040 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15041 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15042 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15043
15044 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15045 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15046 NULL,
15047 NULL,
15048
15049 "fcomi\t{%y1, %0|%0, %y1}",
15050 "fcomip\t{%y1, %0|%0, %y1}",
15051 "fucomi\t{%y1, %0|%0, %y1}",
15052 "fucomip\t{%y1, %0|%0, %y1}",
15053
15054 NULL,
15055 NULL,
15056 NULL,
15057 NULL
15058 };
15059
15060 int mask;
15061 const char *ret;
15062
15063 mask = eflags_p << 3;
15064 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15065 mask |= unordered_p << 1;
15066 mask |= stack_top_dies;
15067
15068 gcc_assert (mask < 16);
15069 ret = alt[mask];
15070 gcc_assert (ret);
15071
15072 return ret;
15073 }
15074 }
15075
15076 void
15077 ix86_output_addr_vec_elt (FILE *file, int value)
15078 {
15079 const char *directive = ASM_LONG;
15080
15081 #ifdef ASM_QUAD
15082 if (TARGET_LP64)
15083 directive = ASM_QUAD;
15084 #else
15085 gcc_assert (!TARGET_64BIT);
15086 #endif
15087
15088 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15089 }
15090
15091 void
15092 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15093 {
15094 const char *directive = ASM_LONG;
15095
15096 #ifdef ASM_QUAD
15097 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15098 directive = ASM_QUAD;
15099 #else
15100 gcc_assert (!TARGET_64BIT);
15101 #endif
15102 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15103 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15104 fprintf (file, "%s%s%d-%s%d\n",
15105 directive, LPREFIX, value, LPREFIX, rel);
15106 else if (HAVE_AS_GOTOFF_IN_DATA)
15107 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15108 #if TARGET_MACHO
15109 else if (TARGET_MACHO)
15110 {
15111 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15112 machopic_output_function_base_name (file);
15113 putc ('\n', file);
15114 }
15115 #endif
15116 else
15117 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15118 GOT_SYMBOL_NAME, LPREFIX, value);
15119 }
15120 \f
15121 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15122 for the target. */
15123
15124 void
15125 ix86_expand_clear (rtx dest)
15126 {
15127 rtx tmp;
15128
15129 /* We play register width games, which are only valid after reload. */
15130 gcc_assert (reload_completed);
15131
15132 /* Avoid HImode and its attendant prefix byte. */
15133 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15134 dest = gen_rtx_REG (SImode, REGNO (dest));
15135 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15136
15137 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15138 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15139 {
15140 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15141 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15142 }
15143
15144 emit_insn (tmp);
15145 }
15146
15147 /* X is an unchanging MEM. If it is a constant pool reference, return
15148 the constant pool rtx, else NULL. */
15149
15150 rtx
15151 maybe_get_pool_constant (rtx x)
15152 {
15153 x = ix86_delegitimize_address (XEXP (x, 0));
15154
15155 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15156 return get_pool_constant (x);
15157
15158 return NULL_RTX;
15159 }
15160
15161 void
15162 ix86_expand_move (enum machine_mode mode, rtx operands[])
15163 {
15164 rtx op0, op1;
15165 enum tls_model model;
15166
15167 op0 = operands[0];
15168 op1 = operands[1];
15169
15170 if (GET_CODE (op1) == SYMBOL_REF)
15171 {
15172 model = SYMBOL_REF_TLS_MODEL (op1);
15173 if (model)
15174 {
15175 op1 = legitimize_tls_address (op1, model, true);
15176 op1 = force_operand (op1, op0);
15177 if (op1 == op0)
15178 return;
15179 if (GET_MODE (op1) != mode)
15180 op1 = convert_to_mode (mode, op1, 1);
15181 }
15182 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15183 && SYMBOL_REF_DLLIMPORT_P (op1))
15184 op1 = legitimize_dllimport_symbol (op1, false);
15185 }
15186 else if (GET_CODE (op1) == CONST
15187 && GET_CODE (XEXP (op1, 0)) == PLUS
15188 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15189 {
15190 rtx addend = XEXP (XEXP (op1, 0), 1);
15191 rtx symbol = XEXP (XEXP (op1, 0), 0);
15192 rtx tmp = NULL;
15193
15194 model = SYMBOL_REF_TLS_MODEL (symbol);
15195 if (model)
15196 tmp = legitimize_tls_address (symbol, model, true);
15197 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15198 && SYMBOL_REF_DLLIMPORT_P (symbol))
15199 tmp = legitimize_dllimport_symbol (symbol, true);
15200
15201 if (tmp)
15202 {
15203 tmp = force_operand (tmp, NULL);
15204 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15205 op0, 1, OPTAB_DIRECT);
15206 if (tmp == op0)
15207 return;
15208 if (GET_MODE (tmp) != mode)
15209 op1 = convert_to_mode (mode, tmp, 1);
15210 }
15211 }
15212
15213 if ((flag_pic || MACHOPIC_INDIRECT)
15214 && symbolic_operand (op1, mode))
15215 {
15216 if (TARGET_MACHO && !TARGET_64BIT)
15217 {
15218 #if TARGET_MACHO
15219 /* dynamic-no-pic */
15220 if (MACHOPIC_INDIRECT)
15221 {
15222 rtx temp = ((reload_in_progress
15223 || ((op0 && REG_P (op0))
15224 && mode == Pmode))
15225 ? op0 : gen_reg_rtx (Pmode));
15226 op1 = machopic_indirect_data_reference (op1, temp);
15227 if (MACHOPIC_PURE)
15228 op1 = machopic_legitimize_pic_address (op1, mode,
15229 temp == op1 ? 0 : temp);
15230 }
15231 if (op0 != op1 && GET_CODE (op0) != MEM)
15232 {
15233 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15234 emit_insn (insn);
15235 return;
15236 }
15237 if (GET_CODE (op0) == MEM)
15238 op1 = force_reg (Pmode, op1);
15239 else
15240 {
15241 rtx temp = op0;
15242 if (GET_CODE (temp) != REG)
15243 temp = gen_reg_rtx (Pmode);
15244 temp = legitimize_pic_address (op1, temp);
15245 if (temp == op0)
15246 return;
15247 op1 = temp;
15248 }
15249 /* dynamic-no-pic */
15250 #endif
15251 }
15252 else
15253 {
15254 if (MEM_P (op0))
15255 op1 = force_reg (mode, op1);
15256 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15257 {
15258 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15259 op1 = legitimize_pic_address (op1, reg);
15260 if (op0 == op1)
15261 return;
15262 if (GET_MODE (op1) != mode)
15263 op1 = convert_to_mode (mode, op1, 1);
15264 }
15265 }
15266 }
15267 else
15268 {
15269 if (MEM_P (op0)
15270 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15271 || !push_operand (op0, mode))
15272 && MEM_P (op1))
15273 op1 = force_reg (mode, op1);
15274
15275 if (push_operand (op0, mode)
15276 && ! general_no_elim_operand (op1, mode))
15277 op1 = copy_to_mode_reg (mode, op1);
15278
15279 /* Force large constants in 64bit compilation into register
15280 to get them CSEed. */
15281 if (can_create_pseudo_p ()
15282 && (mode == DImode) && TARGET_64BIT
15283 && immediate_operand (op1, mode)
15284 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15285 && !register_operand (op0, mode)
15286 && optimize)
15287 op1 = copy_to_mode_reg (mode, op1);
15288
15289 if (can_create_pseudo_p ()
15290 && FLOAT_MODE_P (mode)
15291 && GET_CODE (op1) == CONST_DOUBLE)
15292 {
15293 /* If we are loading a floating point constant to a register,
15294 force the value to memory now, since we'll get better code
15295 out the back end. */
15296
15297 op1 = validize_mem (force_const_mem (mode, op1));
15298 if (!register_operand (op0, mode))
15299 {
15300 rtx temp = gen_reg_rtx (mode);
15301 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15302 emit_move_insn (op0, temp);
15303 return;
15304 }
15305 }
15306 }
15307
15308 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15309 }
15310
15311 void
15312 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15313 {
15314 rtx op0 = operands[0], op1 = operands[1];
15315 unsigned int align = GET_MODE_ALIGNMENT (mode);
15316
15317 /* Force constants other than zero into memory. We do not know how
15318 the instructions used to build constants modify the upper 64 bits
15319 of the register, once we have that information we may be able
15320 to handle some of them more efficiently. */
15321 if (can_create_pseudo_p ()
15322 && register_operand (op0, mode)
15323 && (CONSTANT_P (op1)
15324 || (GET_CODE (op1) == SUBREG
15325 && CONSTANT_P (SUBREG_REG (op1))))
15326 && !standard_sse_constant_p (op1))
15327 op1 = validize_mem (force_const_mem (mode, op1));
15328
15329 /* We need to check memory alignment for SSE mode since attribute
15330 can make operands unaligned. */
15331 if (can_create_pseudo_p ()
15332 && SSE_REG_MODE_P (mode)
15333 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15334 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15335 {
15336 rtx tmp[2];
15337
15338 /* ix86_expand_vector_move_misalign() does not like constants ... */
15339 if (CONSTANT_P (op1)
15340 || (GET_CODE (op1) == SUBREG
15341 && CONSTANT_P (SUBREG_REG (op1))))
15342 op1 = validize_mem (force_const_mem (mode, op1));
15343
15344 /* ... nor both arguments in memory. */
15345 if (!register_operand (op0, mode)
15346 && !register_operand (op1, mode))
15347 op1 = force_reg (mode, op1);
15348
15349 tmp[0] = op0; tmp[1] = op1;
15350 ix86_expand_vector_move_misalign (mode, tmp);
15351 return;
15352 }
15353
15354 /* Make operand1 a register if it isn't already. */
15355 if (can_create_pseudo_p ()
15356 && !register_operand (op0, mode)
15357 && !register_operand (op1, mode))
15358 {
15359 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15360 return;
15361 }
15362
15363 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15364 }
15365
15366 /* Split 32-byte AVX unaligned load and store if needed. */
15367
15368 static void
15369 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15370 {
15371 rtx m;
15372 rtx (*extract) (rtx, rtx, rtx);
15373 rtx (*move_unaligned) (rtx, rtx);
15374 enum machine_mode mode;
15375
15376 switch (GET_MODE (op0))
15377 {
15378 default:
15379 gcc_unreachable ();
15380 case V32QImode:
15381 extract = gen_avx_vextractf128v32qi;
15382 move_unaligned = gen_avx_movdqu256;
15383 mode = V16QImode;
15384 break;
15385 case V8SFmode:
15386 extract = gen_avx_vextractf128v8sf;
15387 move_unaligned = gen_avx_movups256;
15388 mode = V4SFmode;
15389 break;
15390 case V4DFmode:
15391 extract = gen_avx_vextractf128v4df;
15392 move_unaligned = gen_avx_movupd256;
15393 mode = V2DFmode;
15394 break;
15395 }
15396
15397 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15398 {
15399 rtx r = gen_reg_rtx (mode);
15400 m = adjust_address (op1, mode, 0);
15401 emit_move_insn (r, m);
15402 m = adjust_address (op1, mode, 16);
15403 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15404 emit_move_insn (op0, r);
15405 }
15406 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15407 {
15408 m = adjust_address (op0, mode, 0);
15409 emit_insn (extract (m, op1, const0_rtx));
15410 m = adjust_address (op0, mode, 16);
15411 emit_insn (extract (m, op1, const1_rtx));
15412 }
15413 else
15414 emit_insn (move_unaligned (op0, op1));
15415 }
15416
15417 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15418 straight to ix86_expand_vector_move. */
15419 /* Code generation for scalar reg-reg moves of single and double precision data:
15420 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15421 movaps reg, reg
15422 else
15423 movss reg, reg
15424 if (x86_sse_partial_reg_dependency == true)
15425 movapd reg, reg
15426 else
15427 movsd reg, reg
15428
15429 Code generation for scalar loads of double precision data:
15430 if (x86_sse_split_regs == true)
15431 movlpd mem, reg (gas syntax)
15432 else
15433 movsd mem, reg
15434
15435 Code generation for unaligned packed loads of single precision data
15436 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15437 if (x86_sse_unaligned_move_optimal)
15438 movups mem, reg
15439
15440 if (x86_sse_partial_reg_dependency == true)
15441 {
15442 xorps reg, reg
15443 movlps mem, reg
15444 movhps mem+8, reg
15445 }
15446 else
15447 {
15448 movlps mem, reg
15449 movhps mem+8, reg
15450 }
15451
15452 Code generation for unaligned packed loads of double precision data
15453 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15454 if (x86_sse_unaligned_move_optimal)
15455 movupd mem, reg
15456
15457 if (x86_sse_split_regs == true)
15458 {
15459 movlpd mem, reg
15460 movhpd mem+8, reg
15461 }
15462 else
15463 {
15464 movsd mem, reg
15465 movhpd mem+8, reg
15466 }
15467 */
15468
15469 void
15470 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15471 {
15472 rtx op0, op1, m;
15473
15474 op0 = operands[0];
15475 op1 = operands[1];
15476
15477 if (TARGET_AVX)
15478 {
15479 switch (GET_MODE_CLASS (mode))
15480 {
15481 case MODE_VECTOR_INT:
15482 case MODE_INT:
15483 switch (GET_MODE_SIZE (mode))
15484 {
15485 case 16:
15486 /* If we're optimizing for size, movups is the smallest. */
15487 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15488 {
15489 op0 = gen_lowpart (V4SFmode, op0);
15490 op1 = gen_lowpart (V4SFmode, op1);
15491 emit_insn (gen_sse_movups (op0, op1));
15492 return;
15493 }
15494 op0 = gen_lowpart (V16QImode, op0);
15495 op1 = gen_lowpart (V16QImode, op1);
15496 emit_insn (gen_sse2_movdqu (op0, op1));
15497 break;
15498 case 32:
15499 op0 = gen_lowpart (V32QImode, op0);
15500 op1 = gen_lowpart (V32QImode, op1);
15501 ix86_avx256_split_vector_move_misalign (op0, op1);
15502 break;
15503 default:
15504 gcc_unreachable ();
15505 }
15506 break;
15507 case MODE_VECTOR_FLOAT:
15508 op0 = gen_lowpart (mode, op0);
15509 op1 = gen_lowpart (mode, op1);
15510
15511 switch (mode)
15512 {
15513 case V4SFmode:
15514 emit_insn (gen_sse_movups (op0, op1));
15515 break;
15516 case V8SFmode:
15517 ix86_avx256_split_vector_move_misalign (op0, op1);
15518 break;
15519 case V2DFmode:
15520 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15521 {
15522 op0 = gen_lowpart (V4SFmode, op0);
15523 op1 = gen_lowpart (V4SFmode, op1);
15524 emit_insn (gen_sse_movups (op0, op1));
15525 return;
15526 }
15527 emit_insn (gen_sse2_movupd (op0, op1));
15528 break;
15529 case V4DFmode:
15530 ix86_avx256_split_vector_move_misalign (op0, op1);
15531 break;
15532 default:
15533 gcc_unreachable ();
15534 }
15535 break;
15536
15537 default:
15538 gcc_unreachable ();
15539 }
15540
15541 return;
15542 }
15543
15544 if (MEM_P (op1))
15545 {
15546 /* If we're optimizing for size, movups is the smallest. */
15547 if (optimize_insn_for_size_p ()
15548 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15549 {
15550 op0 = gen_lowpart (V4SFmode, op0);
15551 op1 = gen_lowpart (V4SFmode, op1);
15552 emit_insn (gen_sse_movups (op0, op1));
15553 return;
15554 }
15555
15556 /* ??? If we have typed data, then it would appear that using
15557 movdqu is the only way to get unaligned data loaded with
15558 integer type. */
15559 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15560 {
15561 op0 = gen_lowpart (V16QImode, op0);
15562 op1 = gen_lowpart (V16QImode, op1);
15563 emit_insn (gen_sse2_movdqu (op0, op1));
15564 return;
15565 }
15566
15567 if (TARGET_SSE2 && mode == V2DFmode)
15568 {
15569 rtx zero;
15570
15571 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15572 {
15573 op0 = gen_lowpart (V2DFmode, op0);
15574 op1 = gen_lowpart (V2DFmode, op1);
15575 emit_insn (gen_sse2_movupd (op0, op1));
15576 return;
15577 }
15578
15579 /* When SSE registers are split into halves, we can avoid
15580 writing to the top half twice. */
15581 if (TARGET_SSE_SPLIT_REGS)
15582 {
15583 emit_clobber (op0);
15584 zero = op0;
15585 }
15586 else
15587 {
15588 /* ??? Not sure about the best option for the Intel chips.
15589 The following would seem to satisfy; the register is
15590 entirely cleared, breaking the dependency chain. We
15591 then store to the upper half, with a dependency depth
15592 of one. A rumor has it that Intel recommends two movsd
15593 followed by an unpacklpd, but this is unconfirmed. And
15594 given that the dependency depth of the unpacklpd would
15595 still be one, I'm not sure why this would be better. */
15596 zero = CONST0_RTX (V2DFmode);
15597 }
15598
15599 m = adjust_address (op1, DFmode, 0);
15600 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15601 m = adjust_address (op1, DFmode, 8);
15602 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15603 }
15604 else
15605 {
15606 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15607 {
15608 op0 = gen_lowpart (V4SFmode, op0);
15609 op1 = gen_lowpart (V4SFmode, op1);
15610 emit_insn (gen_sse_movups (op0, op1));
15611 return;
15612 }
15613
15614 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15615 emit_move_insn (op0, CONST0_RTX (mode));
15616 else
15617 emit_clobber (op0);
15618
15619 if (mode != V4SFmode)
15620 op0 = gen_lowpart (V4SFmode, op0);
15621 m = adjust_address (op1, V2SFmode, 0);
15622 emit_insn (gen_sse_loadlps (op0, op0, m));
15623 m = adjust_address (op1, V2SFmode, 8);
15624 emit_insn (gen_sse_loadhps (op0, op0, m));
15625 }
15626 }
15627 else if (MEM_P (op0))
15628 {
15629 /* If we're optimizing for size, movups is the smallest. */
15630 if (optimize_insn_for_size_p ()
15631 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15632 {
15633 op0 = gen_lowpart (V4SFmode, op0);
15634 op1 = gen_lowpart (V4SFmode, op1);
15635 emit_insn (gen_sse_movups (op0, op1));
15636 return;
15637 }
15638
15639 /* ??? Similar to above, only less clear because of quote
15640 typeless stores unquote. */
15641 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15642 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15643 {
15644 op0 = gen_lowpart (V16QImode, op0);
15645 op1 = gen_lowpart (V16QImode, op1);
15646 emit_insn (gen_sse2_movdqu (op0, op1));
15647 return;
15648 }
15649
15650 if (TARGET_SSE2 && mode == V2DFmode)
15651 {
15652 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15653 {
15654 op0 = gen_lowpart (V2DFmode, op0);
15655 op1 = gen_lowpart (V2DFmode, op1);
15656 emit_insn (gen_sse2_movupd (op0, op1));
15657 }
15658 else
15659 {
15660 m = adjust_address (op0, DFmode, 0);
15661 emit_insn (gen_sse2_storelpd (m, op1));
15662 m = adjust_address (op0, DFmode, 8);
15663 emit_insn (gen_sse2_storehpd (m, op1));
15664 }
15665 }
15666 else
15667 {
15668 if (mode != V4SFmode)
15669 op1 = gen_lowpart (V4SFmode, op1);
15670
15671 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15672 {
15673 op0 = gen_lowpart (V4SFmode, op0);
15674 emit_insn (gen_sse_movups (op0, op1));
15675 }
15676 else
15677 {
15678 m = adjust_address (op0, V2SFmode, 0);
15679 emit_insn (gen_sse_storelps (m, op1));
15680 m = adjust_address (op0, V2SFmode, 8);
15681 emit_insn (gen_sse_storehps (m, op1));
15682 }
15683 }
15684 }
15685 else
15686 gcc_unreachable ();
15687 }
15688
15689 /* Expand a push in MODE. This is some mode for which we do not support
15690 proper push instructions, at least from the registers that we expect
15691 the value to live in. */
15692
15693 void
15694 ix86_expand_push (enum machine_mode mode, rtx x)
15695 {
15696 rtx tmp;
15697
15698 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15699 GEN_INT (-GET_MODE_SIZE (mode)),
15700 stack_pointer_rtx, 1, OPTAB_DIRECT);
15701 if (tmp != stack_pointer_rtx)
15702 emit_move_insn (stack_pointer_rtx, tmp);
15703
15704 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15705
15706 /* When we push an operand onto stack, it has to be aligned at least
15707 at the function argument boundary. However since we don't have
15708 the argument type, we can't determine the actual argument
15709 boundary. */
15710 emit_move_insn (tmp, x);
15711 }
15712
15713 /* Helper function of ix86_fixup_binary_operands to canonicalize
15714 operand order. Returns true if the operands should be swapped. */
15715
15716 static bool
15717 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15718 rtx operands[])
15719 {
15720 rtx dst = operands[0];
15721 rtx src1 = operands[1];
15722 rtx src2 = operands[2];
15723
15724 /* If the operation is not commutative, we can't do anything. */
15725 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15726 return false;
15727
15728 /* Highest priority is that src1 should match dst. */
15729 if (rtx_equal_p (dst, src1))
15730 return false;
15731 if (rtx_equal_p (dst, src2))
15732 return true;
15733
15734 /* Next highest priority is that immediate constants come second. */
15735 if (immediate_operand (src2, mode))
15736 return false;
15737 if (immediate_operand (src1, mode))
15738 return true;
15739
15740 /* Lowest priority is that memory references should come second. */
15741 if (MEM_P (src2))
15742 return false;
15743 if (MEM_P (src1))
15744 return true;
15745
15746 return false;
15747 }
15748
15749
15750 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15751 destination to use for the operation. If different from the true
15752 destination in operands[0], a copy operation will be required. */
15753
15754 rtx
15755 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15756 rtx operands[])
15757 {
15758 rtx dst = operands[0];
15759 rtx src1 = operands[1];
15760 rtx src2 = operands[2];
15761
15762 /* Canonicalize operand order. */
15763 if (ix86_swap_binary_operands_p (code, mode, operands))
15764 {
15765 rtx temp;
15766
15767 /* It is invalid to swap operands of different modes. */
15768 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15769
15770 temp = src1;
15771 src1 = src2;
15772 src2 = temp;
15773 }
15774
15775 /* Both source operands cannot be in memory. */
15776 if (MEM_P (src1) && MEM_P (src2))
15777 {
15778 /* Optimization: Only read from memory once. */
15779 if (rtx_equal_p (src1, src2))
15780 {
15781 src2 = force_reg (mode, src2);
15782 src1 = src2;
15783 }
15784 else
15785 src2 = force_reg (mode, src2);
15786 }
15787
15788 /* If the destination is memory, and we do not have matching source
15789 operands, do things in registers. */
15790 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15791 dst = gen_reg_rtx (mode);
15792
15793 /* Source 1 cannot be a constant. */
15794 if (CONSTANT_P (src1))
15795 src1 = force_reg (mode, src1);
15796
15797 /* Source 1 cannot be a non-matching memory. */
15798 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15799 src1 = force_reg (mode, src1);
15800
15801 /* Improve address combine. */
15802 if (code == PLUS
15803 && GET_MODE_CLASS (mode) == MODE_INT
15804 && MEM_P (src2))
15805 src2 = force_reg (mode, src2);
15806
15807 operands[1] = src1;
15808 operands[2] = src2;
15809 return dst;
15810 }
15811
15812 /* Similarly, but assume that the destination has already been
15813 set up properly. */
15814
15815 void
15816 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15817 enum machine_mode mode, rtx operands[])
15818 {
15819 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15820 gcc_assert (dst == operands[0]);
15821 }
15822
15823 /* Attempt to expand a binary operator. Make the expansion closer to the
15824 actual machine, then just general_operand, which will allow 3 separate
15825 memory references (one output, two input) in a single insn. */
15826
15827 void
15828 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15829 rtx operands[])
15830 {
15831 rtx src1, src2, dst, op, clob;
15832
15833 dst = ix86_fixup_binary_operands (code, mode, operands);
15834 src1 = operands[1];
15835 src2 = operands[2];
15836
15837 /* Emit the instruction. */
15838
15839 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15840 if (reload_in_progress)
15841 {
15842 /* Reload doesn't know about the flags register, and doesn't know that
15843 it doesn't want to clobber it. We can only do this with PLUS. */
15844 gcc_assert (code == PLUS);
15845 emit_insn (op);
15846 }
15847 else if (reload_completed
15848 && code == PLUS
15849 && !rtx_equal_p (dst, src1))
15850 {
15851 /* This is going to be an LEA; avoid splitting it later. */
15852 emit_insn (op);
15853 }
15854 else
15855 {
15856 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15857 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15858 }
15859
15860 /* Fix up the destination if needed. */
15861 if (dst != operands[0])
15862 emit_move_insn (operands[0], dst);
15863 }
15864
15865 /* Return TRUE or FALSE depending on whether the binary operator meets the
15866 appropriate constraints. */
15867
15868 bool
15869 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15870 rtx operands[3])
15871 {
15872 rtx dst = operands[0];
15873 rtx src1 = operands[1];
15874 rtx src2 = operands[2];
15875
15876 /* Both source operands cannot be in memory. */
15877 if (MEM_P (src1) && MEM_P (src2))
15878 return false;
15879
15880 /* Canonicalize operand order for commutative operators. */
15881 if (ix86_swap_binary_operands_p (code, mode, operands))
15882 {
15883 rtx temp = src1;
15884 src1 = src2;
15885 src2 = temp;
15886 }
15887
15888 /* If the destination is memory, we must have a matching source operand. */
15889 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15890 return false;
15891
15892 /* Source 1 cannot be a constant. */
15893 if (CONSTANT_P (src1))
15894 return false;
15895
15896 /* Source 1 cannot be a non-matching memory. */
15897 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15898 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15899 return (code == AND
15900 && (mode == HImode
15901 || mode == SImode
15902 || (TARGET_64BIT && mode == DImode))
15903 && satisfies_constraint_L (src2));
15904
15905 return true;
15906 }
15907
15908 /* Attempt to expand a unary operator. Make the expansion closer to the
15909 actual machine, then just general_operand, which will allow 2 separate
15910 memory references (one output, one input) in a single insn. */
15911
15912 void
15913 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15914 rtx operands[])
15915 {
15916 int matching_memory;
15917 rtx src, dst, op, clob;
15918
15919 dst = operands[0];
15920 src = operands[1];
15921
15922 /* If the destination is memory, and we do not have matching source
15923 operands, do things in registers. */
15924 matching_memory = 0;
15925 if (MEM_P (dst))
15926 {
15927 if (rtx_equal_p (dst, src))
15928 matching_memory = 1;
15929 else
15930 dst = gen_reg_rtx (mode);
15931 }
15932
15933 /* When source operand is memory, destination must match. */
15934 if (MEM_P (src) && !matching_memory)
15935 src = force_reg (mode, src);
15936
15937 /* Emit the instruction. */
15938
15939 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15940 if (reload_in_progress || code == NOT)
15941 {
15942 /* Reload doesn't know about the flags register, and doesn't know that
15943 it doesn't want to clobber it. */
15944 gcc_assert (code == NOT);
15945 emit_insn (op);
15946 }
15947 else
15948 {
15949 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15950 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15951 }
15952
15953 /* Fix up the destination if needed. */
15954 if (dst != operands[0])
15955 emit_move_insn (operands[0], dst);
15956 }
15957
15958 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15959 divisor are within the range [0-255]. */
15960
15961 void
15962 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15963 bool signed_p)
15964 {
15965 rtx end_label, qimode_label;
15966 rtx insn, div, mod;
15967 rtx scratch, tmp0, tmp1, tmp2;
15968 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15969 rtx (*gen_zero_extend) (rtx, rtx);
15970 rtx (*gen_test_ccno_1) (rtx, rtx);
15971
15972 switch (mode)
15973 {
15974 case SImode:
15975 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15976 gen_test_ccno_1 = gen_testsi_ccno_1;
15977 gen_zero_extend = gen_zero_extendqisi2;
15978 break;
15979 case DImode:
15980 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15981 gen_test_ccno_1 = gen_testdi_ccno_1;
15982 gen_zero_extend = gen_zero_extendqidi2;
15983 break;
15984 default:
15985 gcc_unreachable ();
15986 }
15987
15988 end_label = gen_label_rtx ();
15989 qimode_label = gen_label_rtx ();
15990
15991 scratch = gen_reg_rtx (mode);
15992
15993 /* Use 8bit unsigned divimod if dividend and divisor are within
15994 the range [0-255]. */
15995 emit_move_insn (scratch, operands[2]);
15996 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15997 scratch, 1, OPTAB_DIRECT);
15998 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15999 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16000 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16001 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16002 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16003 pc_rtx);
16004 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16005 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16006 JUMP_LABEL (insn) = qimode_label;
16007
16008 /* Generate original signed/unsigned divimod. */
16009 div = gen_divmod4_1 (operands[0], operands[1],
16010 operands[2], operands[3]);
16011 emit_insn (div);
16012
16013 /* Branch to the end. */
16014 emit_jump_insn (gen_jump (end_label));
16015 emit_barrier ();
16016
16017 /* Generate 8bit unsigned divide. */
16018 emit_label (qimode_label);
16019 /* Don't use operands[0] for result of 8bit divide since not all
16020 registers support QImode ZERO_EXTRACT. */
16021 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16022 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16023 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16024 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16025
16026 if (signed_p)
16027 {
16028 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16029 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16030 }
16031 else
16032 {
16033 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16034 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16035 }
16036
16037 /* Extract remainder from AH. */
16038 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16039 if (REG_P (operands[1]))
16040 insn = emit_move_insn (operands[1], tmp1);
16041 else
16042 {
16043 /* Need a new scratch register since the old one has result
16044 of 8bit divide. */
16045 scratch = gen_reg_rtx (mode);
16046 emit_move_insn (scratch, tmp1);
16047 insn = emit_move_insn (operands[1], scratch);
16048 }
16049 set_unique_reg_note (insn, REG_EQUAL, mod);
16050
16051 /* Zero extend quotient from AL. */
16052 tmp1 = gen_lowpart (QImode, tmp0);
16053 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16054 set_unique_reg_note (insn, REG_EQUAL, div);
16055
16056 emit_label (end_label);
16057 }
16058
16059 #define LEA_MAX_STALL (3)
16060 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16061
16062 /* Increase given DISTANCE in half-cycles according to
16063 dependencies between PREV and NEXT instructions.
16064 Add 1 half-cycle if there is no dependency and
16065 go to next cycle if there is some dependecy. */
16066
16067 static unsigned int
16068 increase_distance (rtx prev, rtx next, unsigned int distance)
16069 {
16070 df_ref *use_rec;
16071 df_ref *def_rec;
16072
16073 if (!prev || !next)
16074 return distance + (distance & 1) + 2;
16075
16076 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16077 return distance + 1;
16078
16079 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16080 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16081 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16082 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16083 return distance + (distance & 1) + 2;
16084
16085 return distance + 1;
16086 }
16087
16088 /* Function checks if instruction INSN defines register number
16089 REGNO1 or REGNO2. */
16090
16091 static bool
16092 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16093 rtx insn)
16094 {
16095 df_ref *def_rec;
16096
16097 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16098 if (DF_REF_REG_DEF_P (*def_rec)
16099 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16100 && (regno1 == DF_REF_REGNO (*def_rec)
16101 || regno2 == DF_REF_REGNO (*def_rec)))
16102 {
16103 return true;
16104 }
16105
16106 return false;
16107 }
16108
16109 /* Function checks if instruction INSN uses register number
16110 REGNO as a part of address expression. */
16111
16112 static bool
16113 insn_uses_reg_mem (unsigned int regno, rtx insn)
16114 {
16115 df_ref *use_rec;
16116
16117 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16118 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16119 return true;
16120
16121 return false;
16122 }
16123
16124 /* Search backward for non-agu definition of register number REGNO1
16125 or register number REGNO2 in basic block starting from instruction
16126 START up to head of basic block or instruction INSN.
16127
16128 Function puts true value into *FOUND var if definition was found
16129 and false otherwise.
16130
16131 Distance in half-cycles between START and found instruction or head
16132 of BB is added to DISTANCE and returned. */
16133
16134 static int
16135 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16136 rtx insn, int distance,
16137 rtx start, bool *found)
16138 {
16139 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16140 rtx prev = start;
16141 rtx next = NULL;
16142 enum attr_type insn_type;
16143
16144 *found = false;
16145
16146 while (prev
16147 && prev != insn
16148 && distance < LEA_SEARCH_THRESHOLD)
16149 {
16150 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16151 {
16152 distance = increase_distance (prev, next, distance);
16153 if (insn_defines_reg (regno1, regno2, prev))
16154 {
16155 insn_type = get_attr_type (prev);
16156 if (insn_type != TYPE_LEA)
16157 {
16158 *found = true;
16159 return distance;
16160 }
16161 }
16162
16163 next = prev;
16164 }
16165 if (prev == BB_HEAD (bb))
16166 break;
16167
16168 prev = PREV_INSN (prev);
16169 }
16170
16171 return distance;
16172 }
16173
16174 /* Search backward for non-agu definition of register number REGNO1
16175 or register number REGNO2 in INSN's basic block until
16176 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16177 2. Reach neighbour BBs boundary, or
16178 3. Reach agu definition.
16179 Returns the distance between the non-agu definition point and INSN.
16180 If no definition point, returns -1. */
16181
16182 static int
16183 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16184 rtx insn)
16185 {
16186 basic_block bb = BLOCK_FOR_INSN (insn);
16187 int distance = 0;
16188 bool found = false;
16189
16190 if (insn != BB_HEAD (bb))
16191 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16192 distance, PREV_INSN (insn),
16193 &found);
16194
16195 if (!found && distance < LEA_SEARCH_THRESHOLD)
16196 {
16197 edge e;
16198 edge_iterator ei;
16199 bool simple_loop = false;
16200
16201 FOR_EACH_EDGE (e, ei, bb->preds)
16202 if (e->src == bb)
16203 {
16204 simple_loop = true;
16205 break;
16206 }
16207
16208 if (simple_loop)
16209 distance = distance_non_agu_define_in_bb (regno1, regno2,
16210 insn, distance,
16211 BB_END (bb), &found);
16212 else
16213 {
16214 int shortest_dist = -1;
16215 bool found_in_bb = false;
16216
16217 FOR_EACH_EDGE (e, ei, bb->preds)
16218 {
16219 int bb_dist
16220 = distance_non_agu_define_in_bb (regno1, regno2,
16221 insn, distance,
16222 BB_END (e->src),
16223 &found_in_bb);
16224 if (found_in_bb)
16225 {
16226 if (shortest_dist < 0)
16227 shortest_dist = bb_dist;
16228 else if (bb_dist > 0)
16229 shortest_dist = MIN (bb_dist, shortest_dist);
16230
16231 found = true;
16232 }
16233 }
16234
16235 distance = shortest_dist;
16236 }
16237 }
16238
16239 /* get_attr_type may modify recog data. We want to make sure
16240 that recog data is valid for instruction INSN, on which
16241 distance_non_agu_define is called. INSN is unchanged here. */
16242 extract_insn_cached (insn);
16243
16244 if (!found)
16245 return -1;
16246
16247 return distance >> 1;
16248 }
16249
16250 /* Return the distance in half-cycles between INSN and the next
16251 insn that uses register number REGNO in memory address added
16252 to DISTANCE. Return -1 if REGNO0 is set.
16253
16254 Put true value into *FOUND if register usage was found and
16255 false otherwise.
16256 Put true value into *REDEFINED if register redefinition was
16257 found and false otherwise. */
16258
16259 static int
16260 distance_agu_use_in_bb (unsigned int regno,
16261 rtx insn, int distance, rtx start,
16262 bool *found, bool *redefined)
16263 {
16264 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16265 rtx next = start;
16266 rtx prev = NULL;
16267
16268 *found = false;
16269 *redefined = false;
16270
16271 while (next
16272 && next != insn
16273 && distance < LEA_SEARCH_THRESHOLD)
16274 {
16275 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16276 {
16277 distance = increase_distance(prev, next, distance);
16278 if (insn_uses_reg_mem (regno, next))
16279 {
16280 /* Return DISTANCE if OP0 is used in memory
16281 address in NEXT. */
16282 *found = true;
16283 return distance;
16284 }
16285
16286 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16287 {
16288 /* Return -1 if OP0 is set in NEXT. */
16289 *redefined = true;
16290 return -1;
16291 }
16292
16293 prev = next;
16294 }
16295
16296 if (next == BB_END (bb))
16297 break;
16298
16299 next = NEXT_INSN (next);
16300 }
16301
16302 return distance;
16303 }
16304
16305 /* Return the distance between INSN and the next insn that uses
16306 register number REGNO0 in memory address. Return -1 if no such
16307 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16308
16309 static int
16310 distance_agu_use (unsigned int regno0, rtx insn)
16311 {
16312 basic_block bb = BLOCK_FOR_INSN (insn);
16313 int distance = 0;
16314 bool found = false;
16315 bool redefined = false;
16316
16317 if (insn != BB_END (bb))
16318 distance = distance_agu_use_in_bb (regno0, insn, distance,
16319 NEXT_INSN (insn),
16320 &found, &redefined);
16321
16322 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16323 {
16324 edge e;
16325 edge_iterator ei;
16326 bool simple_loop = false;
16327
16328 FOR_EACH_EDGE (e, ei, bb->succs)
16329 if (e->dest == bb)
16330 {
16331 simple_loop = true;
16332 break;
16333 }
16334
16335 if (simple_loop)
16336 distance = distance_agu_use_in_bb (regno0, insn,
16337 distance, BB_HEAD (bb),
16338 &found, &redefined);
16339 else
16340 {
16341 int shortest_dist = -1;
16342 bool found_in_bb = false;
16343 bool redefined_in_bb = false;
16344
16345 FOR_EACH_EDGE (e, ei, bb->succs)
16346 {
16347 int bb_dist
16348 = distance_agu_use_in_bb (regno0, insn,
16349 distance, BB_HEAD (e->dest),
16350 &found_in_bb, &redefined_in_bb);
16351 if (found_in_bb)
16352 {
16353 if (shortest_dist < 0)
16354 shortest_dist = bb_dist;
16355 else if (bb_dist > 0)
16356 shortest_dist = MIN (bb_dist, shortest_dist);
16357
16358 found = true;
16359 }
16360 }
16361
16362 distance = shortest_dist;
16363 }
16364 }
16365
16366 if (!found || redefined)
16367 return -1;
16368
16369 return distance >> 1;
16370 }
16371
16372 /* Define this macro to tune LEA priority vs ADD, it take effect when
16373 there is a dilemma of choicing LEA or ADD
16374 Negative value: ADD is more preferred than LEA
16375 Zero: Netrual
16376 Positive value: LEA is more preferred than ADD*/
16377 #define IX86_LEA_PRIORITY 0
16378
16379 /* Return true if usage of lea INSN has performance advantage
16380 over a sequence of instructions. Instructions sequence has
16381 SPLIT_COST cycles higher latency than lea latency. */
16382
16383 bool
16384 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16385 unsigned int regno2, unsigned int split_cost)
16386 {
16387 int dist_define, dist_use;
16388
16389 dist_define = distance_non_agu_define (regno1, regno2, insn);
16390 dist_use = distance_agu_use (regno0, insn);
16391
16392 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16393 {
16394 /* If there is no non AGU operand definition, no AGU
16395 operand usage and split cost is 0 then both lea
16396 and non lea variants have same priority. Currently
16397 we prefer lea for 64 bit code and non lea on 32 bit
16398 code. */
16399 if (dist_use < 0 && split_cost == 0)
16400 return TARGET_64BIT || IX86_LEA_PRIORITY;
16401 else
16402 return true;
16403 }
16404
16405 /* With longer definitions distance lea is more preferable.
16406 Here we change it to take into account splitting cost and
16407 lea priority. */
16408 dist_define += split_cost + IX86_LEA_PRIORITY;
16409
16410 /* If there is no use in memory addess then we just check
16411 that split cost does not exceed AGU stall. */
16412 if (dist_use < 0)
16413 return dist_define >= LEA_MAX_STALL;
16414
16415 /* If this insn has both backward non-agu dependence and forward
16416 agu dependence, the one with short distance takes effect. */
16417 return dist_define >= dist_use;
16418 }
16419
16420 /* Return true if it is legal to clobber flags by INSN and
16421 false otherwise. */
16422
16423 static bool
16424 ix86_ok_to_clobber_flags (rtx insn)
16425 {
16426 basic_block bb = BLOCK_FOR_INSN (insn);
16427 df_ref *use;
16428 bitmap live;
16429
16430 while (insn)
16431 {
16432 if (NONDEBUG_INSN_P (insn))
16433 {
16434 for (use = DF_INSN_USES (insn); *use; use++)
16435 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16436 return false;
16437
16438 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16439 return true;
16440 }
16441
16442 if (insn == BB_END (bb))
16443 break;
16444
16445 insn = NEXT_INSN (insn);
16446 }
16447
16448 live = df_get_live_out(bb);
16449 return !REGNO_REG_SET_P (live, FLAGS_REG);
16450 }
16451
16452 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16453 move and add to avoid AGU stalls. */
16454
16455 bool
16456 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16457 {
16458 unsigned int regno0 = true_regnum (operands[0]);
16459 unsigned int regno1 = true_regnum (operands[1]);
16460 unsigned int regno2 = true_regnum (operands[2]);
16461
16462 /* Check if we need to optimize. */
16463 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16464 return false;
16465
16466 /* Check it is correct to split here. */
16467 if (!ix86_ok_to_clobber_flags(insn))
16468 return false;
16469
16470 /* We need to split only adds with non destructive
16471 destination operand. */
16472 if (regno0 == regno1 || regno0 == regno2)
16473 return false;
16474 else
16475 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16476 }
16477
16478 /* Return true if we need to split lea into a sequence of
16479 instructions to avoid AGU stalls. */
16480
16481 bool
16482 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16483 {
16484 unsigned int regno0 = true_regnum (operands[0]) ;
16485 unsigned int regno1 = -1;
16486 unsigned int regno2 = -1;
16487 unsigned int split_cost = 0;
16488 struct ix86_address parts;
16489 int ok;
16490
16491 /* Check we need to optimize. */
16492 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16493 return false;
16494
16495 /* Check it is correct to split here. */
16496 if (!ix86_ok_to_clobber_flags(insn))
16497 return false;
16498
16499 ok = ix86_decompose_address (operands[1], &parts);
16500 gcc_assert (ok);
16501
16502 /* We should not split into add if non legitimate pic
16503 operand is used as displacement. */
16504 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16505 return false;
16506
16507 if (parts.base)
16508 regno1 = true_regnum (parts.base);
16509 if (parts.index)
16510 regno2 = true_regnum (parts.index);
16511
16512 /* Compute how many cycles we will add to execution time
16513 if split lea into a sequence of instructions. */
16514 if (parts.base || parts.index)
16515 {
16516 /* Have to use mov instruction if non desctructive
16517 destination form is used. */
16518 if (regno1 != regno0 && regno2 != regno0)
16519 split_cost += 1;
16520
16521 /* Have to add index to base if both exist. */
16522 if (parts.base && parts.index)
16523 split_cost += 1;
16524
16525 /* Have to use shift and adds if scale is 2 or greater. */
16526 if (parts.scale > 1)
16527 {
16528 if (regno0 != regno1)
16529 split_cost += 1;
16530 else if (regno2 == regno0)
16531 split_cost += 4;
16532 else
16533 split_cost += parts.scale;
16534 }
16535
16536 /* Have to use add instruction with immediate if
16537 disp is non zero. */
16538 if (parts.disp && parts.disp != const0_rtx)
16539 split_cost += 1;
16540
16541 /* Subtract the price of lea. */
16542 split_cost -= 1;
16543 }
16544
16545 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16546 }
16547
16548 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16549 matches destination. RTX includes clobber of FLAGS_REG. */
16550
16551 static void
16552 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16553 rtx dst, rtx src)
16554 {
16555 rtx op, clob;
16556
16557 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16558 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16559
16560 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16561 }
16562
16563 /* Split lea instructions into a sequence of instructions
16564 which are executed on ALU to avoid AGU stalls.
16565 It is assumed that it is allowed to clobber flags register
16566 at lea position. */
16567
16568 extern void
16569 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16570 {
16571 unsigned int regno0 = true_regnum (operands[0]) ;
16572 unsigned int regno1 = INVALID_REGNUM;
16573 unsigned int regno2 = INVALID_REGNUM;
16574 struct ix86_address parts;
16575 rtx tmp;
16576 int ok, adds;
16577
16578 ok = ix86_decompose_address (operands[1], &parts);
16579 gcc_assert (ok);
16580
16581 if (parts.base)
16582 {
16583 if (GET_MODE (parts.base) != mode)
16584 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16585 regno1 = true_regnum (parts.base);
16586 }
16587
16588 if (parts.index)
16589 {
16590 if (GET_MODE (parts.index) != mode)
16591 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16592 regno2 = true_regnum (parts.index);
16593 }
16594
16595 if (parts.scale > 1)
16596 {
16597 /* Case r1 = r1 + ... */
16598 if (regno1 == regno0)
16599 {
16600 /* If we have a case r1 = r1 + C * r1 then we
16601 should use multiplication which is very
16602 expensive. Assume cost model is wrong if we
16603 have such case here. */
16604 gcc_assert (regno2 != regno0);
16605
16606 for (adds = parts.scale; adds > 0; adds--)
16607 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16608 }
16609 else
16610 {
16611 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16612 if (regno0 != regno2)
16613 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16614
16615 /* Use shift for scaling. */
16616 ix86_emit_binop (ASHIFT, mode, operands[0],
16617 GEN_INT (exact_log2 (parts.scale)));
16618
16619 if (parts.base)
16620 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16621
16622 if (parts.disp && parts.disp != const0_rtx)
16623 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16624 }
16625 }
16626 else if (!parts.base && !parts.index)
16627 {
16628 gcc_assert(parts.disp);
16629 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16630 }
16631 else
16632 {
16633 if (!parts.base)
16634 {
16635 if (regno0 != regno2)
16636 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16637 }
16638 else if (!parts.index)
16639 {
16640 if (regno0 != regno1)
16641 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16642 }
16643 else
16644 {
16645 if (regno0 == regno1)
16646 tmp = parts.index;
16647 else if (regno0 == regno2)
16648 tmp = parts.base;
16649 else
16650 {
16651 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16652 tmp = parts.index;
16653 }
16654
16655 ix86_emit_binop (PLUS, mode, operands[0], tmp);
16656 }
16657
16658 if (parts.disp && parts.disp != const0_rtx)
16659 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16660 }
16661 }
16662
16663 /* Return true if it is ok to optimize an ADD operation to LEA
16664 operation to avoid flag register consumation. For most processors,
16665 ADD is faster than LEA. For the processors like ATOM, if the
16666 destination register of LEA holds an actual address which will be
16667 used soon, LEA is better and otherwise ADD is better. */
16668
16669 bool
16670 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16671 {
16672 unsigned int regno0 = true_regnum (operands[0]);
16673 unsigned int regno1 = true_regnum (operands[1]);
16674 unsigned int regno2 = true_regnum (operands[2]);
16675
16676 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16677 if (regno0 != regno1 && regno0 != regno2)
16678 return true;
16679
16680 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16681 return false;
16682
16683 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16684 }
16685
16686 /* Return true if destination reg of SET_BODY is shift count of
16687 USE_BODY. */
16688
16689 static bool
16690 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16691 {
16692 rtx set_dest;
16693 rtx shift_rtx;
16694 int i;
16695
16696 /* Retrieve destination of SET_BODY. */
16697 switch (GET_CODE (set_body))
16698 {
16699 case SET:
16700 set_dest = SET_DEST (set_body);
16701 if (!set_dest || !REG_P (set_dest))
16702 return false;
16703 break;
16704 case PARALLEL:
16705 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16706 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16707 use_body))
16708 return true;
16709 default:
16710 return false;
16711 break;
16712 }
16713
16714 /* Retrieve shift count of USE_BODY. */
16715 switch (GET_CODE (use_body))
16716 {
16717 case SET:
16718 shift_rtx = XEXP (use_body, 1);
16719 break;
16720 case PARALLEL:
16721 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16722 if (ix86_dep_by_shift_count_body (set_body,
16723 XVECEXP (use_body, 0, i)))
16724 return true;
16725 default:
16726 return false;
16727 break;
16728 }
16729
16730 if (shift_rtx
16731 && (GET_CODE (shift_rtx) == ASHIFT
16732 || GET_CODE (shift_rtx) == LSHIFTRT
16733 || GET_CODE (shift_rtx) == ASHIFTRT
16734 || GET_CODE (shift_rtx) == ROTATE
16735 || GET_CODE (shift_rtx) == ROTATERT))
16736 {
16737 rtx shift_count = XEXP (shift_rtx, 1);
16738
16739 /* Return true if shift count is dest of SET_BODY. */
16740 if (REG_P (shift_count)
16741 && true_regnum (set_dest) == true_regnum (shift_count))
16742 return true;
16743 }
16744
16745 return false;
16746 }
16747
16748 /* Return true if destination reg of SET_INSN is shift count of
16749 USE_INSN. */
16750
16751 bool
16752 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16753 {
16754 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16755 PATTERN (use_insn));
16756 }
16757
16758 /* Return TRUE or FALSE depending on whether the unary operator meets the
16759 appropriate constraints. */
16760
16761 bool
16762 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16763 enum machine_mode mode ATTRIBUTE_UNUSED,
16764 rtx operands[2] ATTRIBUTE_UNUSED)
16765 {
16766 /* If one of operands is memory, source and destination must match. */
16767 if ((MEM_P (operands[0])
16768 || MEM_P (operands[1]))
16769 && ! rtx_equal_p (operands[0], operands[1]))
16770 return false;
16771 return true;
16772 }
16773
16774 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16775 are ok, keeping in mind the possible movddup alternative. */
16776
16777 bool
16778 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16779 {
16780 if (MEM_P (operands[0]))
16781 return rtx_equal_p (operands[0], operands[1 + high]);
16782 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16783 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16784 return true;
16785 }
16786
16787 /* Post-reload splitter for converting an SF or DFmode value in an
16788 SSE register into an unsigned SImode. */
16789
16790 void
16791 ix86_split_convert_uns_si_sse (rtx operands[])
16792 {
16793 enum machine_mode vecmode;
16794 rtx value, large, zero_or_two31, input, two31, x;
16795
16796 large = operands[1];
16797 zero_or_two31 = operands[2];
16798 input = operands[3];
16799 two31 = operands[4];
16800 vecmode = GET_MODE (large);
16801 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16802
16803 /* Load up the value into the low element. We must ensure that the other
16804 elements are valid floats -- zero is the easiest such value. */
16805 if (MEM_P (input))
16806 {
16807 if (vecmode == V4SFmode)
16808 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16809 else
16810 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16811 }
16812 else
16813 {
16814 input = gen_rtx_REG (vecmode, REGNO (input));
16815 emit_move_insn (value, CONST0_RTX (vecmode));
16816 if (vecmode == V4SFmode)
16817 emit_insn (gen_sse_movss (value, value, input));
16818 else
16819 emit_insn (gen_sse2_movsd (value, value, input));
16820 }
16821
16822 emit_move_insn (large, two31);
16823 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16824
16825 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16826 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16827
16828 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16829 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16830
16831 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16832 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16833
16834 large = gen_rtx_REG (V4SImode, REGNO (large));
16835 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16836
16837 x = gen_rtx_REG (V4SImode, REGNO (value));
16838 if (vecmode == V4SFmode)
16839 emit_insn (gen_sse2_cvttps2dq (x, value));
16840 else
16841 emit_insn (gen_sse2_cvttpd2dq (x, value));
16842 value = x;
16843
16844 emit_insn (gen_xorv4si3 (value, value, large));
16845 }
16846
16847 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16848 Expects the 64-bit DImode to be supplied in a pair of integral
16849 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16850 -mfpmath=sse, !optimize_size only. */
16851
16852 void
16853 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16854 {
16855 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16856 rtx int_xmm, fp_xmm;
16857 rtx biases, exponents;
16858 rtx x;
16859
16860 int_xmm = gen_reg_rtx (V4SImode);
16861 if (TARGET_INTER_UNIT_MOVES)
16862 emit_insn (gen_movdi_to_sse (int_xmm, input));
16863 else if (TARGET_SSE_SPLIT_REGS)
16864 {
16865 emit_clobber (int_xmm);
16866 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16867 }
16868 else
16869 {
16870 x = gen_reg_rtx (V2DImode);
16871 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16872 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16873 }
16874
16875 x = gen_rtx_CONST_VECTOR (V4SImode,
16876 gen_rtvec (4, GEN_INT (0x43300000UL),
16877 GEN_INT (0x45300000UL),
16878 const0_rtx, const0_rtx));
16879 exponents = validize_mem (force_const_mem (V4SImode, x));
16880
16881 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16882 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16883
16884 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16885 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16886 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16887 (0x1.0p84 + double(fp_value_hi_xmm)).
16888 Note these exponents differ by 32. */
16889
16890 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16891
16892 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16893 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16894 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16895 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16896 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16897 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16898 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16899 biases = validize_mem (force_const_mem (V2DFmode, biases));
16900 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16901
16902 /* Add the upper and lower DFmode values together. */
16903 if (TARGET_SSE3)
16904 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16905 else
16906 {
16907 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16908 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16909 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16910 }
16911
16912 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16913 }
16914
16915 /* Not used, but eases macroization of patterns. */
16916 void
16917 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16918 rtx input ATTRIBUTE_UNUSED)
16919 {
16920 gcc_unreachable ();
16921 }
16922
16923 /* Convert an unsigned SImode value into a DFmode. Only currently used
16924 for SSE, but applicable anywhere. */
16925
16926 void
16927 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16928 {
16929 REAL_VALUE_TYPE TWO31r;
16930 rtx x, fp;
16931
16932 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16933 NULL, 1, OPTAB_DIRECT);
16934
16935 fp = gen_reg_rtx (DFmode);
16936 emit_insn (gen_floatsidf2 (fp, x));
16937
16938 real_ldexp (&TWO31r, &dconst1, 31);
16939 x = const_double_from_real_value (TWO31r, DFmode);
16940
16941 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16942 if (x != target)
16943 emit_move_insn (target, x);
16944 }
16945
16946 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16947 32-bit mode; otherwise we have a direct convert instruction. */
16948
16949 void
16950 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16951 {
16952 REAL_VALUE_TYPE TWO32r;
16953 rtx fp_lo, fp_hi, x;
16954
16955 fp_lo = gen_reg_rtx (DFmode);
16956 fp_hi = gen_reg_rtx (DFmode);
16957
16958 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16959
16960 real_ldexp (&TWO32r, &dconst1, 32);
16961 x = const_double_from_real_value (TWO32r, DFmode);
16962 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16963
16964 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16965
16966 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16967 0, OPTAB_DIRECT);
16968 if (x != target)
16969 emit_move_insn (target, x);
16970 }
16971
16972 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16973 For x86_32, -mfpmath=sse, !optimize_size only. */
16974 void
16975 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16976 {
16977 REAL_VALUE_TYPE ONE16r;
16978 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16979
16980 real_ldexp (&ONE16r, &dconst1, 16);
16981 x = const_double_from_real_value (ONE16r, SFmode);
16982 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16983 NULL, 0, OPTAB_DIRECT);
16984 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16985 NULL, 0, OPTAB_DIRECT);
16986 fp_hi = gen_reg_rtx (SFmode);
16987 fp_lo = gen_reg_rtx (SFmode);
16988 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16989 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16990 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16991 0, OPTAB_DIRECT);
16992 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16993 0, OPTAB_DIRECT);
16994 if (!rtx_equal_p (target, fp_hi))
16995 emit_move_insn (target, fp_hi);
16996 }
16997
16998 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16999 then replicate the value for all elements of the vector
17000 register. */
17001
17002 rtx
17003 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17004 {
17005 int i, n_elt;
17006 rtvec v;
17007 enum machine_mode scalar_mode;
17008
17009 switch (mode)
17010 {
17011 case V8SImode:
17012 case V4SImode:
17013 case V4DImode:
17014 case V2DImode:
17015 gcc_assert (vect);
17016 case V8SFmode:
17017 case V4SFmode:
17018 case V4DFmode:
17019 case V2DFmode:
17020 n_elt = GET_MODE_NUNITS (mode);
17021 v = rtvec_alloc (n_elt);
17022 scalar_mode = GET_MODE_INNER (mode);
17023
17024 RTVEC_ELT (v, 0) = value;
17025
17026 for (i = 1; i < n_elt; ++i)
17027 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17028
17029 return gen_rtx_CONST_VECTOR (mode, v);
17030
17031 default:
17032 gcc_unreachable ();
17033 }
17034 }
17035
17036 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17037 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17038 for an SSE register. If VECT is true, then replicate the mask for
17039 all elements of the vector register. If INVERT is true, then create
17040 a mask excluding the sign bit. */
17041
17042 rtx
17043 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17044 {
17045 enum machine_mode vec_mode, imode;
17046 HOST_WIDE_INT hi, lo;
17047 int shift = 63;
17048 rtx v;
17049 rtx mask;
17050
17051 /* Find the sign bit, sign extended to 2*HWI. */
17052 switch (mode)
17053 {
17054 case V8SImode:
17055 case V4SImode:
17056 case V8SFmode:
17057 case V4SFmode:
17058 vec_mode = mode;
17059 mode = GET_MODE_INNER (mode);
17060 imode = SImode;
17061 lo = 0x80000000, hi = lo < 0;
17062 break;
17063
17064 case V4DImode:
17065 case V2DImode:
17066 case V4DFmode:
17067 case V2DFmode:
17068 vec_mode = mode;
17069 mode = GET_MODE_INNER (mode);
17070 imode = DImode;
17071 if (HOST_BITS_PER_WIDE_INT >= 64)
17072 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17073 else
17074 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17075 break;
17076
17077 case TImode:
17078 case TFmode:
17079 vec_mode = VOIDmode;
17080 if (HOST_BITS_PER_WIDE_INT >= 64)
17081 {
17082 imode = TImode;
17083 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17084 }
17085 else
17086 {
17087 rtvec vec;
17088
17089 imode = DImode;
17090 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17091
17092 if (invert)
17093 {
17094 lo = ~lo, hi = ~hi;
17095 v = constm1_rtx;
17096 }
17097 else
17098 v = const0_rtx;
17099
17100 mask = immed_double_const (lo, hi, imode);
17101
17102 vec = gen_rtvec (2, v, mask);
17103 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17104 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17105
17106 return v;
17107 }
17108 break;
17109
17110 default:
17111 gcc_unreachable ();
17112 }
17113
17114 if (invert)
17115 lo = ~lo, hi = ~hi;
17116
17117 /* Force this value into the low part of a fp vector constant. */
17118 mask = immed_double_const (lo, hi, imode);
17119 mask = gen_lowpart (mode, mask);
17120
17121 if (vec_mode == VOIDmode)
17122 return force_reg (mode, mask);
17123
17124 v = ix86_build_const_vector (vec_mode, vect, mask);
17125 return force_reg (vec_mode, v);
17126 }
17127
17128 /* Generate code for floating point ABS or NEG. */
17129
17130 void
17131 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17132 rtx operands[])
17133 {
17134 rtx mask, set, dst, src;
17135 bool use_sse = false;
17136 bool vector_mode = VECTOR_MODE_P (mode);
17137 enum machine_mode vmode = mode;
17138
17139 if (vector_mode)
17140 use_sse = true;
17141 else if (mode == TFmode)
17142 use_sse = true;
17143 else if (TARGET_SSE_MATH)
17144 {
17145 use_sse = SSE_FLOAT_MODE_P (mode);
17146 if (mode == SFmode)
17147 vmode = V4SFmode;
17148 else if (mode == DFmode)
17149 vmode = V2DFmode;
17150 }
17151
17152 /* NEG and ABS performed with SSE use bitwise mask operations.
17153 Create the appropriate mask now. */
17154 if (use_sse)
17155 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17156 else
17157 mask = NULL_RTX;
17158
17159 dst = operands[0];
17160 src = operands[1];
17161
17162 set = gen_rtx_fmt_e (code, mode, src);
17163 set = gen_rtx_SET (VOIDmode, dst, set);
17164
17165 if (mask)
17166 {
17167 rtx use, clob;
17168 rtvec par;
17169
17170 use = gen_rtx_USE (VOIDmode, mask);
17171 if (vector_mode)
17172 par = gen_rtvec (2, set, use);
17173 else
17174 {
17175 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17176 par = gen_rtvec (3, set, use, clob);
17177 }
17178 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17179 }
17180 else
17181 emit_insn (set);
17182 }
17183
17184 /* Expand a copysign operation. Special case operand 0 being a constant. */
17185
17186 void
17187 ix86_expand_copysign (rtx operands[])
17188 {
17189 enum machine_mode mode, vmode;
17190 rtx dest, op0, op1, mask, nmask;
17191
17192 dest = operands[0];
17193 op0 = operands[1];
17194 op1 = operands[2];
17195
17196 mode = GET_MODE (dest);
17197
17198 if (mode == SFmode)
17199 vmode = V4SFmode;
17200 else if (mode == DFmode)
17201 vmode = V2DFmode;
17202 else
17203 vmode = mode;
17204
17205 if (GET_CODE (op0) == CONST_DOUBLE)
17206 {
17207 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17208
17209 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17210 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17211
17212 if (mode == SFmode || mode == DFmode)
17213 {
17214 if (op0 == CONST0_RTX (mode))
17215 op0 = CONST0_RTX (vmode);
17216 else
17217 {
17218 rtx v = ix86_build_const_vector (vmode, false, op0);
17219
17220 op0 = force_reg (vmode, v);
17221 }
17222 }
17223 else if (op0 != CONST0_RTX (mode))
17224 op0 = force_reg (mode, op0);
17225
17226 mask = ix86_build_signbit_mask (vmode, 0, 0);
17227
17228 if (mode == SFmode)
17229 copysign_insn = gen_copysignsf3_const;
17230 else if (mode == DFmode)
17231 copysign_insn = gen_copysigndf3_const;
17232 else
17233 copysign_insn = gen_copysigntf3_const;
17234
17235 emit_insn (copysign_insn (dest, op0, op1, mask));
17236 }
17237 else
17238 {
17239 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17240
17241 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17242 mask = ix86_build_signbit_mask (vmode, 0, 0);
17243
17244 if (mode == SFmode)
17245 copysign_insn = gen_copysignsf3_var;
17246 else if (mode == DFmode)
17247 copysign_insn = gen_copysigndf3_var;
17248 else
17249 copysign_insn = gen_copysigntf3_var;
17250
17251 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17252 }
17253 }
17254
17255 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17256 be a constant, and so has already been expanded into a vector constant. */
17257
17258 void
17259 ix86_split_copysign_const (rtx operands[])
17260 {
17261 enum machine_mode mode, vmode;
17262 rtx dest, op0, mask, x;
17263
17264 dest = operands[0];
17265 op0 = operands[1];
17266 mask = operands[3];
17267
17268 mode = GET_MODE (dest);
17269 vmode = GET_MODE (mask);
17270
17271 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17272 x = gen_rtx_AND (vmode, dest, mask);
17273 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17274
17275 if (op0 != CONST0_RTX (vmode))
17276 {
17277 x = gen_rtx_IOR (vmode, dest, op0);
17278 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17279 }
17280 }
17281
17282 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17283 so we have to do two masks. */
17284
17285 void
17286 ix86_split_copysign_var (rtx operands[])
17287 {
17288 enum machine_mode mode, vmode;
17289 rtx dest, scratch, op0, op1, mask, nmask, x;
17290
17291 dest = operands[0];
17292 scratch = operands[1];
17293 op0 = operands[2];
17294 op1 = operands[3];
17295 nmask = operands[4];
17296 mask = operands[5];
17297
17298 mode = GET_MODE (dest);
17299 vmode = GET_MODE (mask);
17300
17301 if (rtx_equal_p (op0, op1))
17302 {
17303 /* Shouldn't happen often (it's useless, obviously), but when it does
17304 we'd generate incorrect code if we continue below. */
17305 emit_move_insn (dest, op0);
17306 return;
17307 }
17308
17309 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17310 {
17311 gcc_assert (REGNO (op1) == REGNO (scratch));
17312
17313 x = gen_rtx_AND (vmode, scratch, mask);
17314 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17315
17316 dest = mask;
17317 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17318 x = gen_rtx_NOT (vmode, dest);
17319 x = gen_rtx_AND (vmode, x, op0);
17320 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17321 }
17322 else
17323 {
17324 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17325 {
17326 x = gen_rtx_AND (vmode, scratch, mask);
17327 }
17328 else /* alternative 2,4 */
17329 {
17330 gcc_assert (REGNO (mask) == REGNO (scratch));
17331 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17332 x = gen_rtx_AND (vmode, scratch, op1);
17333 }
17334 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17335
17336 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17337 {
17338 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17339 x = gen_rtx_AND (vmode, dest, nmask);
17340 }
17341 else /* alternative 3,4 */
17342 {
17343 gcc_assert (REGNO (nmask) == REGNO (dest));
17344 dest = nmask;
17345 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17346 x = gen_rtx_AND (vmode, dest, op0);
17347 }
17348 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17349 }
17350
17351 x = gen_rtx_IOR (vmode, dest, scratch);
17352 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17353 }
17354
17355 /* Return TRUE or FALSE depending on whether the first SET in INSN
17356 has source and destination with matching CC modes, and that the
17357 CC mode is at least as constrained as REQ_MODE. */
17358
17359 bool
17360 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17361 {
17362 rtx set;
17363 enum machine_mode set_mode;
17364
17365 set = PATTERN (insn);
17366 if (GET_CODE (set) == PARALLEL)
17367 set = XVECEXP (set, 0, 0);
17368 gcc_assert (GET_CODE (set) == SET);
17369 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17370
17371 set_mode = GET_MODE (SET_DEST (set));
17372 switch (set_mode)
17373 {
17374 case CCNOmode:
17375 if (req_mode != CCNOmode
17376 && (req_mode != CCmode
17377 || XEXP (SET_SRC (set), 1) != const0_rtx))
17378 return false;
17379 break;
17380 case CCmode:
17381 if (req_mode == CCGCmode)
17382 return false;
17383 /* FALLTHRU */
17384 case CCGCmode:
17385 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17386 return false;
17387 /* FALLTHRU */
17388 case CCGOCmode:
17389 if (req_mode == CCZmode)
17390 return false;
17391 /* FALLTHRU */
17392 case CCZmode:
17393 break;
17394
17395 case CCAmode:
17396 case CCCmode:
17397 case CCOmode:
17398 case CCSmode:
17399 if (set_mode != req_mode)
17400 return false;
17401 break;
17402
17403 default:
17404 gcc_unreachable ();
17405 }
17406
17407 return GET_MODE (SET_SRC (set)) == set_mode;
17408 }
17409
17410 /* Generate insn patterns to do an integer compare of OPERANDS. */
17411
17412 static rtx
17413 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17414 {
17415 enum machine_mode cmpmode;
17416 rtx tmp, flags;
17417
17418 cmpmode = SELECT_CC_MODE (code, op0, op1);
17419 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17420
17421 /* This is very simple, but making the interface the same as in the
17422 FP case makes the rest of the code easier. */
17423 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17424 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17425
17426 /* Return the test that should be put into the flags user, i.e.
17427 the bcc, scc, or cmov instruction. */
17428 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17429 }
17430
17431 /* Figure out whether to use ordered or unordered fp comparisons.
17432 Return the appropriate mode to use. */
17433
17434 enum machine_mode
17435 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17436 {
17437 /* ??? In order to make all comparisons reversible, we do all comparisons
17438 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17439 all forms trapping and nontrapping comparisons, we can make inequality
17440 comparisons trapping again, since it results in better code when using
17441 FCOM based compares. */
17442 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17443 }
17444
17445 enum machine_mode
17446 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17447 {
17448 enum machine_mode mode = GET_MODE (op0);
17449
17450 if (SCALAR_FLOAT_MODE_P (mode))
17451 {
17452 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17453 return ix86_fp_compare_mode (code);
17454 }
17455
17456 switch (code)
17457 {
17458 /* Only zero flag is needed. */
17459 case EQ: /* ZF=0 */
17460 case NE: /* ZF!=0 */
17461 return CCZmode;
17462 /* Codes needing carry flag. */
17463 case GEU: /* CF=0 */
17464 case LTU: /* CF=1 */
17465 /* Detect overflow checks. They need just the carry flag. */
17466 if (GET_CODE (op0) == PLUS
17467 && rtx_equal_p (op1, XEXP (op0, 0)))
17468 return CCCmode;
17469 else
17470 return CCmode;
17471 case GTU: /* CF=0 & ZF=0 */
17472 case LEU: /* CF=1 | ZF=1 */
17473 /* Detect overflow checks. They need just the carry flag. */
17474 if (GET_CODE (op0) == MINUS
17475 && rtx_equal_p (op1, XEXP (op0, 0)))
17476 return CCCmode;
17477 else
17478 return CCmode;
17479 /* Codes possibly doable only with sign flag when
17480 comparing against zero. */
17481 case GE: /* SF=OF or SF=0 */
17482 case LT: /* SF<>OF or SF=1 */
17483 if (op1 == const0_rtx)
17484 return CCGOCmode;
17485 else
17486 /* For other cases Carry flag is not required. */
17487 return CCGCmode;
17488 /* Codes doable only with sign flag when comparing
17489 against zero, but we miss jump instruction for it
17490 so we need to use relational tests against overflow
17491 that thus needs to be zero. */
17492 case GT: /* ZF=0 & SF=OF */
17493 case LE: /* ZF=1 | SF<>OF */
17494 if (op1 == const0_rtx)
17495 return CCNOmode;
17496 else
17497 return CCGCmode;
17498 /* strcmp pattern do (use flags) and combine may ask us for proper
17499 mode. */
17500 case USE:
17501 return CCmode;
17502 default:
17503 gcc_unreachable ();
17504 }
17505 }
17506
17507 /* Return the fixed registers used for condition codes. */
17508
17509 static bool
17510 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17511 {
17512 *p1 = FLAGS_REG;
17513 *p2 = FPSR_REG;
17514 return true;
17515 }
17516
17517 /* If two condition code modes are compatible, return a condition code
17518 mode which is compatible with both. Otherwise, return
17519 VOIDmode. */
17520
17521 static enum machine_mode
17522 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17523 {
17524 if (m1 == m2)
17525 return m1;
17526
17527 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17528 return VOIDmode;
17529
17530 if ((m1 == CCGCmode && m2 == CCGOCmode)
17531 || (m1 == CCGOCmode && m2 == CCGCmode))
17532 return CCGCmode;
17533
17534 switch (m1)
17535 {
17536 default:
17537 gcc_unreachable ();
17538
17539 case CCmode:
17540 case CCGCmode:
17541 case CCGOCmode:
17542 case CCNOmode:
17543 case CCAmode:
17544 case CCCmode:
17545 case CCOmode:
17546 case CCSmode:
17547 case CCZmode:
17548 switch (m2)
17549 {
17550 default:
17551 return VOIDmode;
17552
17553 case CCmode:
17554 case CCGCmode:
17555 case CCGOCmode:
17556 case CCNOmode:
17557 case CCAmode:
17558 case CCCmode:
17559 case CCOmode:
17560 case CCSmode:
17561 case CCZmode:
17562 return CCmode;
17563 }
17564
17565 case CCFPmode:
17566 case CCFPUmode:
17567 /* These are only compatible with themselves, which we already
17568 checked above. */
17569 return VOIDmode;
17570 }
17571 }
17572
17573
17574 /* Return a comparison we can do and that it is equivalent to
17575 swap_condition (code) apart possibly from orderedness.
17576 But, never change orderedness if TARGET_IEEE_FP, returning
17577 UNKNOWN in that case if necessary. */
17578
17579 static enum rtx_code
17580 ix86_fp_swap_condition (enum rtx_code code)
17581 {
17582 switch (code)
17583 {
17584 case GT: /* GTU - CF=0 & ZF=0 */
17585 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17586 case GE: /* GEU - CF=0 */
17587 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17588 case UNLT: /* LTU - CF=1 */
17589 return TARGET_IEEE_FP ? UNKNOWN : GT;
17590 case UNLE: /* LEU - CF=1 | ZF=1 */
17591 return TARGET_IEEE_FP ? UNKNOWN : GE;
17592 default:
17593 return swap_condition (code);
17594 }
17595 }
17596
17597 /* Return cost of comparison CODE using the best strategy for performance.
17598 All following functions do use number of instructions as a cost metrics.
17599 In future this should be tweaked to compute bytes for optimize_size and
17600 take into account performance of various instructions on various CPUs. */
17601
17602 static int
17603 ix86_fp_comparison_cost (enum rtx_code code)
17604 {
17605 int arith_cost;
17606
17607 /* The cost of code using bit-twiddling on %ah. */
17608 switch (code)
17609 {
17610 case UNLE:
17611 case UNLT:
17612 case LTGT:
17613 case GT:
17614 case GE:
17615 case UNORDERED:
17616 case ORDERED:
17617 case UNEQ:
17618 arith_cost = 4;
17619 break;
17620 case LT:
17621 case NE:
17622 case EQ:
17623 case UNGE:
17624 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17625 break;
17626 case LE:
17627 case UNGT:
17628 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17629 break;
17630 default:
17631 gcc_unreachable ();
17632 }
17633
17634 switch (ix86_fp_comparison_strategy (code))
17635 {
17636 case IX86_FPCMP_COMI:
17637 return arith_cost > 4 ? 3 : 2;
17638 case IX86_FPCMP_SAHF:
17639 return arith_cost > 4 ? 4 : 3;
17640 default:
17641 return arith_cost;
17642 }
17643 }
17644
17645 /* Return strategy to use for floating-point. We assume that fcomi is always
17646 preferrable where available, since that is also true when looking at size
17647 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17648
17649 enum ix86_fpcmp_strategy
17650 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17651 {
17652 /* Do fcomi/sahf based test when profitable. */
17653
17654 if (TARGET_CMOVE)
17655 return IX86_FPCMP_COMI;
17656
17657 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17658 return IX86_FPCMP_SAHF;
17659
17660 return IX86_FPCMP_ARITH;
17661 }
17662
17663 /* Swap, force into registers, or otherwise massage the two operands
17664 to a fp comparison. The operands are updated in place; the new
17665 comparison code is returned. */
17666
17667 static enum rtx_code
17668 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17669 {
17670 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17671 rtx op0 = *pop0, op1 = *pop1;
17672 enum machine_mode op_mode = GET_MODE (op0);
17673 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17674
17675 /* All of the unordered compare instructions only work on registers.
17676 The same is true of the fcomi compare instructions. The XFmode
17677 compare instructions require registers except when comparing
17678 against zero or when converting operand 1 from fixed point to
17679 floating point. */
17680
17681 if (!is_sse
17682 && (fpcmp_mode == CCFPUmode
17683 || (op_mode == XFmode
17684 && ! (standard_80387_constant_p (op0) == 1
17685 || standard_80387_constant_p (op1) == 1)
17686 && GET_CODE (op1) != FLOAT)
17687 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17688 {
17689 op0 = force_reg (op_mode, op0);
17690 op1 = force_reg (op_mode, op1);
17691 }
17692 else
17693 {
17694 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17695 things around if they appear profitable, otherwise force op0
17696 into a register. */
17697
17698 if (standard_80387_constant_p (op0) == 0
17699 || (MEM_P (op0)
17700 && ! (standard_80387_constant_p (op1) == 0
17701 || MEM_P (op1))))
17702 {
17703 enum rtx_code new_code = ix86_fp_swap_condition (code);
17704 if (new_code != UNKNOWN)
17705 {
17706 rtx tmp;
17707 tmp = op0, op0 = op1, op1 = tmp;
17708 code = new_code;
17709 }
17710 }
17711
17712 if (!REG_P (op0))
17713 op0 = force_reg (op_mode, op0);
17714
17715 if (CONSTANT_P (op1))
17716 {
17717 int tmp = standard_80387_constant_p (op1);
17718 if (tmp == 0)
17719 op1 = validize_mem (force_const_mem (op_mode, op1));
17720 else if (tmp == 1)
17721 {
17722 if (TARGET_CMOVE)
17723 op1 = force_reg (op_mode, op1);
17724 }
17725 else
17726 op1 = force_reg (op_mode, op1);
17727 }
17728 }
17729
17730 /* Try to rearrange the comparison to make it cheaper. */
17731 if (ix86_fp_comparison_cost (code)
17732 > ix86_fp_comparison_cost (swap_condition (code))
17733 && (REG_P (op1) || can_create_pseudo_p ()))
17734 {
17735 rtx tmp;
17736 tmp = op0, op0 = op1, op1 = tmp;
17737 code = swap_condition (code);
17738 if (!REG_P (op0))
17739 op0 = force_reg (op_mode, op0);
17740 }
17741
17742 *pop0 = op0;
17743 *pop1 = op1;
17744 return code;
17745 }
17746
17747 /* Convert comparison codes we use to represent FP comparison to integer
17748 code that will result in proper branch. Return UNKNOWN if no such code
17749 is available. */
17750
17751 enum rtx_code
17752 ix86_fp_compare_code_to_integer (enum rtx_code code)
17753 {
17754 switch (code)
17755 {
17756 case GT:
17757 return GTU;
17758 case GE:
17759 return GEU;
17760 case ORDERED:
17761 case UNORDERED:
17762 return code;
17763 break;
17764 case UNEQ:
17765 return EQ;
17766 break;
17767 case UNLT:
17768 return LTU;
17769 break;
17770 case UNLE:
17771 return LEU;
17772 break;
17773 case LTGT:
17774 return NE;
17775 break;
17776 default:
17777 return UNKNOWN;
17778 }
17779 }
17780
17781 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17782
17783 static rtx
17784 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17785 {
17786 enum machine_mode fpcmp_mode, intcmp_mode;
17787 rtx tmp, tmp2;
17788
17789 fpcmp_mode = ix86_fp_compare_mode (code);
17790 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17791
17792 /* Do fcomi/sahf based test when profitable. */
17793 switch (ix86_fp_comparison_strategy (code))
17794 {
17795 case IX86_FPCMP_COMI:
17796 intcmp_mode = fpcmp_mode;
17797 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17798 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17799 tmp);
17800 emit_insn (tmp);
17801 break;
17802
17803 case IX86_FPCMP_SAHF:
17804 intcmp_mode = fpcmp_mode;
17805 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17806 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17807 tmp);
17808
17809 if (!scratch)
17810 scratch = gen_reg_rtx (HImode);
17811 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17812 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17813 break;
17814
17815 case IX86_FPCMP_ARITH:
17816 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17817 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17818 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17819 if (!scratch)
17820 scratch = gen_reg_rtx (HImode);
17821 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17822
17823 /* In the unordered case, we have to check C2 for NaN's, which
17824 doesn't happen to work out to anything nice combination-wise.
17825 So do some bit twiddling on the value we've got in AH to come
17826 up with an appropriate set of condition codes. */
17827
17828 intcmp_mode = CCNOmode;
17829 switch (code)
17830 {
17831 case GT:
17832 case UNGT:
17833 if (code == GT || !TARGET_IEEE_FP)
17834 {
17835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17836 code = EQ;
17837 }
17838 else
17839 {
17840 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17841 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17842 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17843 intcmp_mode = CCmode;
17844 code = GEU;
17845 }
17846 break;
17847 case LT:
17848 case UNLT:
17849 if (code == LT && TARGET_IEEE_FP)
17850 {
17851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17852 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17853 intcmp_mode = CCmode;
17854 code = EQ;
17855 }
17856 else
17857 {
17858 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17859 code = NE;
17860 }
17861 break;
17862 case GE:
17863 case UNGE:
17864 if (code == GE || !TARGET_IEEE_FP)
17865 {
17866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17867 code = EQ;
17868 }
17869 else
17870 {
17871 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17872 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17873 code = NE;
17874 }
17875 break;
17876 case LE:
17877 case UNLE:
17878 if (code == LE && TARGET_IEEE_FP)
17879 {
17880 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17881 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17882 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17883 intcmp_mode = CCmode;
17884 code = LTU;
17885 }
17886 else
17887 {
17888 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17889 code = NE;
17890 }
17891 break;
17892 case EQ:
17893 case UNEQ:
17894 if (code == EQ && TARGET_IEEE_FP)
17895 {
17896 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17897 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17898 intcmp_mode = CCmode;
17899 code = EQ;
17900 }
17901 else
17902 {
17903 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17904 code = NE;
17905 }
17906 break;
17907 case NE:
17908 case LTGT:
17909 if (code == NE && TARGET_IEEE_FP)
17910 {
17911 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17912 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17913 GEN_INT (0x40)));
17914 code = NE;
17915 }
17916 else
17917 {
17918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17919 code = EQ;
17920 }
17921 break;
17922
17923 case UNORDERED:
17924 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17925 code = NE;
17926 break;
17927 case ORDERED:
17928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17929 code = EQ;
17930 break;
17931
17932 default:
17933 gcc_unreachable ();
17934 }
17935 break;
17936
17937 default:
17938 gcc_unreachable();
17939 }
17940
17941 /* Return the test that should be put into the flags user, i.e.
17942 the bcc, scc, or cmov instruction. */
17943 return gen_rtx_fmt_ee (code, VOIDmode,
17944 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17945 const0_rtx);
17946 }
17947
17948 static rtx
17949 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17950 {
17951 rtx ret;
17952
17953 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17954 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17955
17956 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17957 {
17958 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17959 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17960 }
17961 else
17962 ret = ix86_expand_int_compare (code, op0, op1);
17963
17964 return ret;
17965 }
17966
17967 void
17968 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17969 {
17970 enum machine_mode mode = GET_MODE (op0);
17971 rtx tmp;
17972
17973 switch (mode)
17974 {
17975 case SFmode:
17976 case DFmode:
17977 case XFmode:
17978 case QImode:
17979 case HImode:
17980 case SImode:
17981 simple:
17982 tmp = ix86_expand_compare (code, op0, op1);
17983 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17984 gen_rtx_LABEL_REF (VOIDmode, label),
17985 pc_rtx);
17986 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17987 return;
17988
17989 case DImode:
17990 if (TARGET_64BIT)
17991 goto simple;
17992 case TImode:
17993 /* Expand DImode branch into multiple compare+branch. */
17994 {
17995 rtx lo[2], hi[2], label2;
17996 enum rtx_code code1, code2, code3;
17997 enum machine_mode submode;
17998
17999 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18000 {
18001 tmp = op0, op0 = op1, op1 = tmp;
18002 code = swap_condition (code);
18003 }
18004
18005 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18006 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18007
18008 submode = mode == DImode ? SImode : DImode;
18009
18010 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18011 avoid two branches. This costs one extra insn, so disable when
18012 optimizing for size. */
18013
18014 if ((code == EQ || code == NE)
18015 && (!optimize_insn_for_size_p ()
18016 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18017 {
18018 rtx xor0, xor1;
18019
18020 xor1 = hi[0];
18021 if (hi[1] != const0_rtx)
18022 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18023 NULL_RTX, 0, OPTAB_WIDEN);
18024
18025 xor0 = lo[0];
18026 if (lo[1] != const0_rtx)
18027 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18028 NULL_RTX, 0, OPTAB_WIDEN);
18029
18030 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18031 NULL_RTX, 0, OPTAB_WIDEN);
18032
18033 ix86_expand_branch (code, tmp, const0_rtx, label);
18034 return;
18035 }
18036
18037 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18038 op1 is a constant and the low word is zero, then we can just
18039 examine the high word. Similarly for low word -1 and
18040 less-or-equal-than or greater-than. */
18041
18042 if (CONST_INT_P (hi[1]))
18043 switch (code)
18044 {
18045 case LT: case LTU: case GE: case GEU:
18046 if (lo[1] == const0_rtx)
18047 {
18048 ix86_expand_branch (code, hi[0], hi[1], label);
18049 return;
18050 }
18051 break;
18052 case LE: case LEU: case GT: case GTU:
18053 if (lo[1] == constm1_rtx)
18054 {
18055 ix86_expand_branch (code, hi[0], hi[1], label);
18056 return;
18057 }
18058 break;
18059 default:
18060 break;
18061 }
18062
18063 /* Otherwise, we need two or three jumps. */
18064
18065 label2 = gen_label_rtx ();
18066
18067 code1 = code;
18068 code2 = swap_condition (code);
18069 code3 = unsigned_condition (code);
18070
18071 switch (code)
18072 {
18073 case LT: case GT: case LTU: case GTU:
18074 break;
18075
18076 case LE: code1 = LT; code2 = GT; break;
18077 case GE: code1 = GT; code2 = LT; break;
18078 case LEU: code1 = LTU; code2 = GTU; break;
18079 case GEU: code1 = GTU; code2 = LTU; break;
18080
18081 case EQ: code1 = UNKNOWN; code2 = NE; break;
18082 case NE: code2 = UNKNOWN; break;
18083
18084 default:
18085 gcc_unreachable ();
18086 }
18087
18088 /*
18089 * a < b =>
18090 * if (hi(a) < hi(b)) goto true;
18091 * if (hi(a) > hi(b)) goto false;
18092 * if (lo(a) < lo(b)) goto true;
18093 * false:
18094 */
18095
18096 if (code1 != UNKNOWN)
18097 ix86_expand_branch (code1, hi[0], hi[1], label);
18098 if (code2 != UNKNOWN)
18099 ix86_expand_branch (code2, hi[0], hi[1], label2);
18100
18101 ix86_expand_branch (code3, lo[0], lo[1], label);
18102
18103 if (code2 != UNKNOWN)
18104 emit_label (label2);
18105 return;
18106 }
18107
18108 default:
18109 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18110 goto simple;
18111 }
18112 }
18113
18114 /* Split branch based on floating point condition. */
18115 void
18116 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18117 rtx target1, rtx target2, rtx tmp, rtx pushed)
18118 {
18119 rtx condition;
18120 rtx i;
18121
18122 if (target2 != pc_rtx)
18123 {
18124 rtx tmp = target2;
18125 code = reverse_condition_maybe_unordered (code);
18126 target2 = target1;
18127 target1 = tmp;
18128 }
18129
18130 condition = ix86_expand_fp_compare (code, op1, op2,
18131 tmp);
18132
18133 /* Remove pushed operand from stack. */
18134 if (pushed)
18135 ix86_free_from_memory (GET_MODE (pushed));
18136
18137 i = emit_jump_insn (gen_rtx_SET
18138 (VOIDmode, pc_rtx,
18139 gen_rtx_IF_THEN_ELSE (VOIDmode,
18140 condition, target1, target2)));
18141 if (split_branch_probability >= 0)
18142 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18143 }
18144
18145 void
18146 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18147 {
18148 rtx ret;
18149
18150 gcc_assert (GET_MODE (dest) == QImode);
18151
18152 ret = ix86_expand_compare (code, op0, op1);
18153 PUT_MODE (ret, QImode);
18154 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18155 }
18156
18157 /* Expand comparison setting or clearing carry flag. Return true when
18158 successful and set pop for the operation. */
18159 static bool
18160 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18161 {
18162 enum machine_mode mode =
18163 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18164
18165 /* Do not handle double-mode compares that go through special path. */
18166 if (mode == (TARGET_64BIT ? TImode : DImode))
18167 return false;
18168
18169 if (SCALAR_FLOAT_MODE_P (mode))
18170 {
18171 rtx compare_op, compare_seq;
18172
18173 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18174
18175 /* Shortcut: following common codes never translate
18176 into carry flag compares. */
18177 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18178 || code == ORDERED || code == UNORDERED)
18179 return false;
18180
18181 /* These comparisons require zero flag; swap operands so they won't. */
18182 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18183 && !TARGET_IEEE_FP)
18184 {
18185 rtx tmp = op0;
18186 op0 = op1;
18187 op1 = tmp;
18188 code = swap_condition (code);
18189 }
18190
18191 /* Try to expand the comparison and verify that we end up with
18192 carry flag based comparison. This fails to be true only when
18193 we decide to expand comparison using arithmetic that is not
18194 too common scenario. */
18195 start_sequence ();
18196 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18197 compare_seq = get_insns ();
18198 end_sequence ();
18199
18200 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18201 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18202 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18203 else
18204 code = GET_CODE (compare_op);
18205
18206 if (code != LTU && code != GEU)
18207 return false;
18208
18209 emit_insn (compare_seq);
18210 *pop = compare_op;
18211 return true;
18212 }
18213
18214 if (!INTEGRAL_MODE_P (mode))
18215 return false;
18216
18217 switch (code)
18218 {
18219 case LTU:
18220 case GEU:
18221 break;
18222
18223 /* Convert a==0 into (unsigned)a<1. */
18224 case EQ:
18225 case NE:
18226 if (op1 != const0_rtx)
18227 return false;
18228 op1 = const1_rtx;
18229 code = (code == EQ ? LTU : GEU);
18230 break;
18231
18232 /* Convert a>b into b<a or a>=b-1. */
18233 case GTU:
18234 case LEU:
18235 if (CONST_INT_P (op1))
18236 {
18237 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18238 /* Bail out on overflow. We still can swap operands but that
18239 would force loading of the constant into register. */
18240 if (op1 == const0_rtx
18241 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18242 return false;
18243 code = (code == GTU ? GEU : LTU);
18244 }
18245 else
18246 {
18247 rtx tmp = op1;
18248 op1 = op0;
18249 op0 = tmp;
18250 code = (code == GTU ? LTU : GEU);
18251 }
18252 break;
18253
18254 /* Convert a>=0 into (unsigned)a<0x80000000. */
18255 case LT:
18256 case GE:
18257 if (mode == DImode || op1 != const0_rtx)
18258 return false;
18259 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18260 code = (code == LT ? GEU : LTU);
18261 break;
18262 case LE:
18263 case GT:
18264 if (mode == DImode || op1 != constm1_rtx)
18265 return false;
18266 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18267 code = (code == LE ? GEU : LTU);
18268 break;
18269
18270 default:
18271 return false;
18272 }
18273 /* Swapping operands may cause constant to appear as first operand. */
18274 if (!nonimmediate_operand (op0, VOIDmode))
18275 {
18276 if (!can_create_pseudo_p ())
18277 return false;
18278 op0 = force_reg (mode, op0);
18279 }
18280 *pop = ix86_expand_compare (code, op0, op1);
18281 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18282 return true;
18283 }
18284
18285 bool
18286 ix86_expand_int_movcc (rtx operands[])
18287 {
18288 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18289 rtx compare_seq, compare_op;
18290 enum machine_mode mode = GET_MODE (operands[0]);
18291 bool sign_bit_compare_p = false;
18292 rtx op0 = XEXP (operands[1], 0);
18293 rtx op1 = XEXP (operands[1], 1);
18294
18295 start_sequence ();
18296 compare_op = ix86_expand_compare (code, op0, op1);
18297 compare_seq = get_insns ();
18298 end_sequence ();
18299
18300 compare_code = GET_CODE (compare_op);
18301
18302 if ((op1 == const0_rtx && (code == GE || code == LT))
18303 || (op1 == constm1_rtx && (code == GT || code == LE)))
18304 sign_bit_compare_p = true;
18305
18306 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18307 HImode insns, we'd be swallowed in word prefix ops. */
18308
18309 if ((mode != HImode || TARGET_FAST_PREFIX)
18310 && (mode != (TARGET_64BIT ? TImode : DImode))
18311 && CONST_INT_P (operands[2])
18312 && CONST_INT_P (operands[3]))
18313 {
18314 rtx out = operands[0];
18315 HOST_WIDE_INT ct = INTVAL (operands[2]);
18316 HOST_WIDE_INT cf = INTVAL (operands[3]);
18317 HOST_WIDE_INT diff;
18318
18319 diff = ct - cf;
18320 /* Sign bit compares are better done using shifts than we do by using
18321 sbb. */
18322 if (sign_bit_compare_p
18323 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18324 {
18325 /* Detect overlap between destination and compare sources. */
18326 rtx tmp = out;
18327
18328 if (!sign_bit_compare_p)
18329 {
18330 rtx flags;
18331 bool fpcmp = false;
18332
18333 compare_code = GET_CODE (compare_op);
18334
18335 flags = XEXP (compare_op, 0);
18336
18337 if (GET_MODE (flags) == CCFPmode
18338 || GET_MODE (flags) == CCFPUmode)
18339 {
18340 fpcmp = true;
18341 compare_code
18342 = ix86_fp_compare_code_to_integer (compare_code);
18343 }
18344
18345 /* To simplify rest of code, restrict to the GEU case. */
18346 if (compare_code == LTU)
18347 {
18348 HOST_WIDE_INT tmp = ct;
18349 ct = cf;
18350 cf = tmp;
18351 compare_code = reverse_condition (compare_code);
18352 code = reverse_condition (code);
18353 }
18354 else
18355 {
18356 if (fpcmp)
18357 PUT_CODE (compare_op,
18358 reverse_condition_maybe_unordered
18359 (GET_CODE (compare_op)));
18360 else
18361 PUT_CODE (compare_op,
18362 reverse_condition (GET_CODE (compare_op)));
18363 }
18364 diff = ct - cf;
18365
18366 if (reg_overlap_mentioned_p (out, op0)
18367 || reg_overlap_mentioned_p (out, op1))
18368 tmp = gen_reg_rtx (mode);
18369
18370 if (mode == DImode)
18371 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18372 else
18373 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18374 flags, compare_op));
18375 }
18376 else
18377 {
18378 if (code == GT || code == GE)
18379 code = reverse_condition (code);
18380 else
18381 {
18382 HOST_WIDE_INT tmp = ct;
18383 ct = cf;
18384 cf = tmp;
18385 diff = ct - cf;
18386 }
18387 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18388 }
18389
18390 if (diff == 1)
18391 {
18392 /*
18393 * cmpl op0,op1
18394 * sbbl dest,dest
18395 * [addl dest, ct]
18396 *
18397 * Size 5 - 8.
18398 */
18399 if (ct)
18400 tmp = expand_simple_binop (mode, PLUS,
18401 tmp, GEN_INT (ct),
18402 copy_rtx (tmp), 1, OPTAB_DIRECT);
18403 }
18404 else if (cf == -1)
18405 {
18406 /*
18407 * cmpl op0,op1
18408 * sbbl dest,dest
18409 * orl $ct, dest
18410 *
18411 * Size 8.
18412 */
18413 tmp = expand_simple_binop (mode, IOR,
18414 tmp, GEN_INT (ct),
18415 copy_rtx (tmp), 1, OPTAB_DIRECT);
18416 }
18417 else if (diff == -1 && ct)
18418 {
18419 /*
18420 * cmpl op0,op1
18421 * sbbl dest,dest
18422 * notl dest
18423 * [addl dest, cf]
18424 *
18425 * Size 8 - 11.
18426 */
18427 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18428 if (cf)
18429 tmp = expand_simple_binop (mode, PLUS,
18430 copy_rtx (tmp), GEN_INT (cf),
18431 copy_rtx (tmp), 1, OPTAB_DIRECT);
18432 }
18433 else
18434 {
18435 /*
18436 * cmpl op0,op1
18437 * sbbl dest,dest
18438 * [notl dest]
18439 * andl cf - ct, dest
18440 * [addl dest, ct]
18441 *
18442 * Size 8 - 11.
18443 */
18444
18445 if (cf == 0)
18446 {
18447 cf = ct;
18448 ct = 0;
18449 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18450 }
18451
18452 tmp = expand_simple_binop (mode, AND,
18453 copy_rtx (tmp),
18454 gen_int_mode (cf - ct, mode),
18455 copy_rtx (tmp), 1, OPTAB_DIRECT);
18456 if (ct)
18457 tmp = expand_simple_binop (mode, PLUS,
18458 copy_rtx (tmp), GEN_INT (ct),
18459 copy_rtx (tmp), 1, OPTAB_DIRECT);
18460 }
18461
18462 if (!rtx_equal_p (tmp, out))
18463 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18464
18465 return true;
18466 }
18467
18468 if (diff < 0)
18469 {
18470 enum machine_mode cmp_mode = GET_MODE (op0);
18471
18472 HOST_WIDE_INT tmp;
18473 tmp = ct, ct = cf, cf = tmp;
18474 diff = -diff;
18475
18476 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18477 {
18478 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18479
18480 /* We may be reversing unordered compare to normal compare, that
18481 is not valid in general (we may convert non-trapping condition
18482 to trapping one), however on i386 we currently emit all
18483 comparisons unordered. */
18484 compare_code = reverse_condition_maybe_unordered (compare_code);
18485 code = reverse_condition_maybe_unordered (code);
18486 }
18487 else
18488 {
18489 compare_code = reverse_condition (compare_code);
18490 code = reverse_condition (code);
18491 }
18492 }
18493
18494 compare_code = UNKNOWN;
18495 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18496 && CONST_INT_P (op1))
18497 {
18498 if (op1 == const0_rtx
18499 && (code == LT || code == GE))
18500 compare_code = code;
18501 else if (op1 == constm1_rtx)
18502 {
18503 if (code == LE)
18504 compare_code = LT;
18505 else if (code == GT)
18506 compare_code = GE;
18507 }
18508 }
18509
18510 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18511 if (compare_code != UNKNOWN
18512 && GET_MODE (op0) == GET_MODE (out)
18513 && (cf == -1 || ct == -1))
18514 {
18515 /* If lea code below could be used, only optimize
18516 if it results in a 2 insn sequence. */
18517
18518 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18519 || diff == 3 || diff == 5 || diff == 9)
18520 || (compare_code == LT && ct == -1)
18521 || (compare_code == GE && cf == -1))
18522 {
18523 /*
18524 * notl op1 (if necessary)
18525 * sarl $31, op1
18526 * orl cf, op1
18527 */
18528 if (ct != -1)
18529 {
18530 cf = ct;
18531 ct = -1;
18532 code = reverse_condition (code);
18533 }
18534
18535 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18536
18537 out = expand_simple_binop (mode, IOR,
18538 out, GEN_INT (cf),
18539 out, 1, OPTAB_DIRECT);
18540 if (out != operands[0])
18541 emit_move_insn (operands[0], out);
18542
18543 return true;
18544 }
18545 }
18546
18547
18548 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18549 || diff == 3 || diff == 5 || diff == 9)
18550 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18551 && (mode != DImode
18552 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18553 {
18554 /*
18555 * xorl dest,dest
18556 * cmpl op1,op2
18557 * setcc dest
18558 * lea cf(dest*(ct-cf)),dest
18559 *
18560 * Size 14.
18561 *
18562 * This also catches the degenerate setcc-only case.
18563 */
18564
18565 rtx tmp;
18566 int nops;
18567
18568 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18569
18570 nops = 0;
18571 /* On x86_64 the lea instruction operates on Pmode, so we need
18572 to get arithmetics done in proper mode to match. */
18573 if (diff == 1)
18574 tmp = copy_rtx (out);
18575 else
18576 {
18577 rtx out1;
18578 out1 = copy_rtx (out);
18579 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18580 nops++;
18581 if (diff & 1)
18582 {
18583 tmp = gen_rtx_PLUS (mode, tmp, out1);
18584 nops++;
18585 }
18586 }
18587 if (cf != 0)
18588 {
18589 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18590 nops++;
18591 }
18592 if (!rtx_equal_p (tmp, out))
18593 {
18594 if (nops == 1)
18595 out = force_operand (tmp, copy_rtx (out));
18596 else
18597 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18598 }
18599 if (!rtx_equal_p (out, operands[0]))
18600 emit_move_insn (operands[0], copy_rtx (out));
18601
18602 return true;
18603 }
18604
18605 /*
18606 * General case: Jumpful:
18607 * xorl dest,dest cmpl op1, op2
18608 * cmpl op1, op2 movl ct, dest
18609 * setcc dest jcc 1f
18610 * decl dest movl cf, dest
18611 * andl (cf-ct),dest 1:
18612 * addl ct,dest
18613 *
18614 * Size 20. Size 14.
18615 *
18616 * This is reasonably steep, but branch mispredict costs are
18617 * high on modern cpus, so consider failing only if optimizing
18618 * for space.
18619 */
18620
18621 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18622 && BRANCH_COST (optimize_insn_for_speed_p (),
18623 false) >= 2)
18624 {
18625 if (cf == 0)
18626 {
18627 enum machine_mode cmp_mode = GET_MODE (op0);
18628
18629 cf = ct;
18630 ct = 0;
18631
18632 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18633 {
18634 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18635
18636 /* We may be reversing unordered compare to normal compare,
18637 that is not valid in general (we may convert non-trapping
18638 condition to trapping one), however on i386 we currently
18639 emit all comparisons unordered. */
18640 code = reverse_condition_maybe_unordered (code);
18641 }
18642 else
18643 {
18644 code = reverse_condition (code);
18645 if (compare_code != UNKNOWN)
18646 compare_code = reverse_condition (compare_code);
18647 }
18648 }
18649
18650 if (compare_code != UNKNOWN)
18651 {
18652 /* notl op1 (if needed)
18653 sarl $31, op1
18654 andl (cf-ct), op1
18655 addl ct, op1
18656
18657 For x < 0 (resp. x <= -1) there will be no notl,
18658 so if possible swap the constants to get rid of the
18659 complement.
18660 True/false will be -1/0 while code below (store flag
18661 followed by decrement) is 0/-1, so the constants need
18662 to be exchanged once more. */
18663
18664 if (compare_code == GE || !cf)
18665 {
18666 code = reverse_condition (code);
18667 compare_code = LT;
18668 }
18669 else
18670 {
18671 HOST_WIDE_INT tmp = cf;
18672 cf = ct;
18673 ct = tmp;
18674 }
18675
18676 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18677 }
18678 else
18679 {
18680 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18681
18682 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18683 constm1_rtx,
18684 copy_rtx (out), 1, OPTAB_DIRECT);
18685 }
18686
18687 out = expand_simple_binop (mode, AND, copy_rtx (out),
18688 gen_int_mode (cf - ct, mode),
18689 copy_rtx (out), 1, OPTAB_DIRECT);
18690 if (ct)
18691 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18692 copy_rtx (out), 1, OPTAB_DIRECT);
18693 if (!rtx_equal_p (out, operands[0]))
18694 emit_move_insn (operands[0], copy_rtx (out));
18695
18696 return true;
18697 }
18698 }
18699
18700 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18701 {
18702 /* Try a few things more with specific constants and a variable. */
18703
18704 optab op;
18705 rtx var, orig_out, out, tmp;
18706
18707 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18708 return false;
18709
18710 /* If one of the two operands is an interesting constant, load a
18711 constant with the above and mask it in with a logical operation. */
18712
18713 if (CONST_INT_P (operands[2]))
18714 {
18715 var = operands[3];
18716 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18717 operands[3] = constm1_rtx, op = and_optab;
18718 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18719 operands[3] = const0_rtx, op = ior_optab;
18720 else
18721 return false;
18722 }
18723 else if (CONST_INT_P (operands[3]))
18724 {
18725 var = operands[2];
18726 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18727 operands[2] = constm1_rtx, op = and_optab;
18728 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18729 operands[2] = const0_rtx, op = ior_optab;
18730 else
18731 return false;
18732 }
18733 else
18734 return false;
18735
18736 orig_out = operands[0];
18737 tmp = gen_reg_rtx (mode);
18738 operands[0] = tmp;
18739
18740 /* Recurse to get the constant loaded. */
18741 if (ix86_expand_int_movcc (operands) == 0)
18742 return false;
18743
18744 /* Mask in the interesting variable. */
18745 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18746 OPTAB_WIDEN);
18747 if (!rtx_equal_p (out, orig_out))
18748 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18749
18750 return true;
18751 }
18752
18753 /*
18754 * For comparison with above,
18755 *
18756 * movl cf,dest
18757 * movl ct,tmp
18758 * cmpl op1,op2
18759 * cmovcc tmp,dest
18760 *
18761 * Size 15.
18762 */
18763
18764 if (! nonimmediate_operand (operands[2], mode))
18765 operands[2] = force_reg (mode, operands[2]);
18766 if (! nonimmediate_operand (operands[3], mode))
18767 operands[3] = force_reg (mode, operands[3]);
18768
18769 if (! register_operand (operands[2], VOIDmode)
18770 && (mode == QImode
18771 || ! register_operand (operands[3], VOIDmode)))
18772 operands[2] = force_reg (mode, operands[2]);
18773
18774 if (mode == QImode
18775 && ! register_operand (operands[3], VOIDmode))
18776 operands[3] = force_reg (mode, operands[3]);
18777
18778 emit_insn (compare_seq);
18779 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18780 gen_rtx_IF_THEN_ELSE (mode,
18781 compare_op, operands[2],
18782 operands[3])));
18783 return true;
18784 }
18785
18786 /* Swap, force into registers, or otherwise massage the two operands
18787 to an sse comparison with a mask result. Thus we differ a bit from
18788 ix86_prepare_fp_compare_args which expects to produce a flags result.
18789
18790 The DEST operand exists to help determine whether to commute commutative
18791 operators. The POP0/POP1 operands are updated in place. The new
18792 comparison code is returned, or UNKNOWN if not implementable. */
18793
18794 static enum rtx_code
18795 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18796 rtx *pop0, rtx *pop1)
18797 {
18798 rtx tmp;
18799
18800 switch (code)
18801 {
18802 case LTGT:
18803 case UNEQ:
18804 /* AVX supports all the needed comparisons. */
18805 if (TARGET_AVX)
18806 break;
18807 /* We have no LTGT as an operator. We could implement it with
18808 NE & ORDERED, but this requires an extra temporary. It's
18809 not clear that it's worth it. */
18810 return UNKNOWN;
18811
18812 case LT:
18813 case LE:
18814 case UNGT:
18815 case UNGE:
18816 /* These are supported directly. */
18817 break;
18818
18819 case EQ:
18820 case NE:
18821 case UNORDERED:
18822 case ORDERED:
18823 /* AVX has 3 operand comparisons, no need to swap anything. */
18824 if (TARGET_AVX)
18825 break;
18826 /* For commutative operators, try to canonicalize the destination
18827 operand to be first in the comparison - this helps reload to
18828 avoid extra moves. */
18829 if (!dest || !rtx_equal_p (dest, *pop1))
18830 break;
18831 /* FALLTHRU */
18832
18833 case GE:
18834 case GT:
18835 case UNLE:
18836 case UNLT:
18837 /* These are not supported directly before AVX, and furthermore
18838 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
18839 comparison operands to transform into something that is
18840 supported. */
18841 tmp = *pop0;
18842 *pop0 = *pop1;
18843 *pop1 = tmp;
18844 code = swap_condition (code);
18845 break;
18846
18847 default:
18848 gcc_unreachable ();
18849 }
18850
18851 return code;
18852 }
18853
18854 /* Detect conditional moves that exactly match min/max operational
18855 semantics. Note that this is IEEE safe, as long as we don't
18856 interchange the operands.
18857
18858 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18859 and TRUE if the operation is successful and instructions are emitted. */
18860
18861 static bool
18862 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18863 rtx cmp_op1, rtx if_true, rtx if_false)
18864 {
18865 enum machine_mode mode;
18866 bool is_min;
18867 rtx tmp;
18868
18869 if (code == LT)
18870 ;
18871 else if (code == UNGE)
18872 {
18873 tmp = if_true;
18874 if_true = if_false;
18875 if_false = tmp;
18876 }
18877 else
18878 return false;
18879
18880 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18881 is_min = true;
18882 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18883 is_min = false;
18884 else
18885 return false;
18886
18887 mode = GET_MODE (dest);
18888
18889 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18890 but MODE may be a vector mode and thus not appropriate. */
18891 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18892 {
18893 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18894 rtvec v;
18895
18896 if_true = force_reg (mode, if_true);
18897 v = gen_rtvec (2, if_true, if_false);
18898 tmp = gen_rtx_UNSPEC (mode, v, u);
18899 }
18900 else
18901 {
18902 code = is_min ? SMIN : SMAX;
18903 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18904 }
18905
18906 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18907 return true;
18908 }
18909
18910 /* Expand an sse vector comparison. Return the register with the result. */
18911
18912 static rtx
18913 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18914 rtx op_true, rtx op_false)
18915 {
18916 enum machine_mode mode = GET_MODE (dest);
18917 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
18918 rtx x;
18919
18920 cmp_op0 = force_reg (cmp_mode, cmp_op0);
18921 if (!nonimmediate_operand (cmp_op1, cmp_mode))
18922 cmp_op1 = force_reg (cmp_mode, cmp_op1);
18923
18924 if (optimize
18925 || reg_overlap_mentioned_p (dest, op_true)
18926 || reg_overlap_mentioned_p (dest, op_false))
18927 dest = gen_reg_rtx (mode);
18928
18929 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
18930 if (cmp_mode != mode)
18931 {
18932 x = force_reg (cmp_mode, x);
18933 convert_move (dest, x, false);
18934 }
18935 else
18936 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18937
18938 return dest;
18939 }
18940
18941 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18942 operations. This is used for both scalar and vector conditional moves. */
18943
18944 static void
18945 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18946 {
18947 enum machine_mode mode = GET_MODE (dest);
18948 rtx t2, t3, x;
18949
18950 if (vector_all_ones_operand (op_true, mode)
18951 && rtx_equal_p (op_false, CONST0_RTX (mode)))
18952 {
18953 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
18954 }
18955 else if (op_false == CONST0_RTX (mode))
18956 {
18957 op_true = force_reg (mode, op_true);
18958 x = gen_rtx_AND (mode, cmp, op_true);
18959 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18960 }
18961 else if (op_true == CONST0_RTX (mode))
18962 {
18963 op_false = force_reg (mode, op_false);
18964 x = gen_rtx_NOT (mode, cmp);
18965 x = gen_rtx_AND (mode, x, op_false);
18966 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18967 }
18968 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
18969 {
18970 op_false = force_reg (mode, op_false);
18971 x = gen_rtx_IOR (mode, cmp, op_false);
18972 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18973 }
18974 else if (TARGET_XOP)
18975 {
18976 op_true = force_reg (mode, op_true);
18977
18978 if (!nonimmediate_operand (op_false, mode))
18979 op_false = force_reg (mode, op_false);
18980
18981 emit_insn (gen_rtx_SET (mode, dest,
18982 gen_rtx_IF_THEN_ELSE (mode, cmp,
18983 op_true,
18984 op_false)));
18985 }
18986 else
18987 {
18988 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
18989
18990 if (!nonimmediate_operand (op_true, mode))
18991 op_true = force_reg (mode, op_true);
18992
18993 op_false = force_reg (mode, op_false);
18994
18995 switch (mode)
18996 {
18997 case V4SFmode:
18998 if (TARGET_SSE4_1)
18999 gen = gen_sse4_1_blendvps;
19000 break;
19001 case V2DFmode:
19002 if (TARGET_SSE4_1)
19003 gen = gen_sse4_1_blendvpd;
19004 break;
19005 case V16QImode:
19006 case V8HImode:
19007 case V4SImode:
19008 case V2DImode:
19009 if (TARGET_SSE4_1)
19010 {
19011 gen = gen_sse4_1_pblendvb;
19012 dest = gen_lowpart (V16QImode, dest);
19013 op_false = gen_lowpart (V16QImode, op_false);
19014 op_true = gen_lowpart (V16QImode, op_true);
19015 cmp = gen_lowpart (V16QImode, cmp);
19016 }
19017 break;
19018 case V8SFmode:
19019 if (TARGET_AVX)
19020 gen = gen_avx_blendvps256;
19021 break;
19022 case V4DFmode:
19023 if (TARGET_AVX)
19024 gen = gen_avx_blendvpd256;
19025 break;
19026 case V32QImode:
19027 case V16HImode:
19028 case V8SImode:
19029 case V4DImode:
19030 if (TARGET_AVX2)
19031 {
19032 gen = gen_avx2_pblendvb;
19033 dest = gen_lowpart (V32QImode, dest);
19034 op_false = gen_lowpart (V32QImode, op_false);
19035 op_true = gen_lowpart (V32QImode, op_true);
19036 cmp = gen_lowpart (V32QImode, cmp);
19037 }
19038 break;
19039 default:
19040 break;
19041 }
19042
19043 if (gen != NULL)
19044 emit_insn (gen (dest, op_false, op_true, cmp));
19045 else
19046 {
19047 op_true = force_reg (mode, op_true);
19048
19049 t2 = gen_reg_rtx (mode);
19050 if (optimize)
19051 t3 = gen_reg_rtx (mode);
19052 else
19053 t3 = dest;
19054
19055 x = gen_rtx_AND (mode, op_true, cmp);
19056 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19057
19058 x = gen_rtx_NOT (mode, cmp);
19059 x = gen_rtx_AND (mode, x, op_false);
19060 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19061
19062 x = gen_rtx_IOR (mode, t3, t2);
19063 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19064 }
19065 }
19066 }
19067
19068 /* Expand a floating-point conditional move. Return true if successful. */
19069
19070 bool
19071 ix86_expand_fp_movcc (rtx operands[])
19072 {
19073 enum machine_mode mode = GET_MODE (operands[0]);
19074 enum rtx_code code = GET_CODE (operands[1]);
19075 rtx tmp, compare_op;
19076 rtx op0 = XEXP (operands[1], 0);
19077 rtx op1 = XEXP (operands[1], 1);
19078
19079 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19080 {
19081 enum machine_mode cmode;
19082
19083 /* Since we've no cmove for sse registers, don't force bad register
19084 allocation just to gain access to it. Deny movcc when the
19085 comparison mode doesn't match the move mode. */
19086 cmode = GET_MODE (op0);
19087 if (cmode == VOIDmode)
19088 cmode = GET_MODE (op1);
19089 if (cmode != mode)
19090 return false;
19091
19092 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19093 if (code == UNKNOWN)
19094 return false;
19095
19096 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19097 operands[2], operands[3]))
19098 return true;
19099
19100 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19101 operands[2], operands[3]);
19102 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19103 return true;
19104 }
19105
19106 /* The floating point conditional move instructions don't directly
19107 support conditions resulting from a signed integer comparison. */
19108
19109 compare_op = ix86_expand_compare (code, op0, op1);
19110 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19111 {
19112 tmp = gen_reg_rtx (QImode);
19113 ix86_expand_setcc (tmp, code, op0, op1);
19114
19115 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19116 }
19117
19118 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19119 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19120 operands[2], operands[3])));
19121
19122 return true;
19123 }
19124
19125 /* Expand a floating-point vector conditional move; a vcond operation
19126 rather than a movcc operation. */
19127
19128 bool
19129 ix86_expand_fp_vcond (rtx operands[])
19130 {
19131 enum rtx_code code = GET_CODE (operands[3]);
19132 rtx cmp;
19133
19134 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19135 &operands[4], &operands[5]);
19136 if (code == UNKNOWN)
19137 {
19138 rtx temp;
19139 switch (GET_CODE (operands[3]))
19140 {
19141 case LTGT:
19142 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19143 operands[5], operands[0], operands[0]);
19144 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19145 operands[5], operands[1], operands[2]);
19146 code = AND;
19147 break;
19148 case UNEQ:
19149 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19150 operands[5], operands[0], operands[0]);
19151 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19152 operands[5], operands[1], operands[2]);
19153 code = IOR;
19154 break;
19155 default:
19156 gcc_unreachable ();
19157 }
19158 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19159 OPTAB_DIRECT);
19160 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19161 return true;
19162 }
19163
19164 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19165 operands[5], operands[1], operands[2]))
19166 return true;
19167
19168 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19169 operands[1], operands[2]);
19170 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19171 return true;
19172 }
19173
19174 /* Expand a signed/unsigned integral vector conditional move. */
19175
19176 bool
19177 ix86_expand_int_vcond (rtx operands[])
19178 {
19179 enum machine_mode data_mode = GET_MODE (operands[0]);
19180 enum machine_mode mode = GET_MODE (operands[4]);
19181 enum rtx_code code = GET_CODE (operands[3]);
19182 bool negate = false;
19183 rtx x, cop0, cop1;
19184
19185 cop0 = operands[4];
19186 cop1 = operands[5];
19187
19188 /* XOP supports all of the comparisons on all vector int types. */
19189 if (!TARGET_XOP)
19190 {
19191 /* Canonicalize the comparison to EQ, GT, GTU. */
19192 switch (code)
19193 {
19194 case EQ:
19195 case GT:
19196 case GTU:
19197 break;
19198
19199 case NE:
19200 case LE:
19201 case LEU:
19202 code = reverse_condition (code);
19203 negate = true;
19204 break;
19205
19206 case GE:
19207 case GEU:
19208 code = reverse_condition (code);
19209 negate = true;
19210 /* FALLTHRU */
19211
19212 case LT:
19213 case LTU:
19214 code = swap_condition (code);
19215 x = cop0, cop0 = cop1, cop1 = x;
19216 break;
19217
19218 default:
19219 gcc_unreachable ();
19220 }
19221
19222 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19223 if (mode == V2DImode)
19224 {
19225 switch (code)
19226 {
19227 case EQ:
19228 /* SSE4.1 supports EQ. */
19229 if (!TARGET_SSE4_1)
19230 return false;
19231 break;
19232
19233 case GT:
19234 case GTU:
19235 /* SSE4.2 supports GT/GTU. */
19236 if (!TARGET_SSE4_2)
19237 return false;
19238 break;
19239
19240 default:
19241 gcc_unreachable ();
19242 }
19243 }
19244
19245 /* Unsigned parallel compare is not supported by the hardware.
19246 Play some tricks to turn this into a signed comparison
19247 against 0. */
19248 if (code == GTU)
19249 {
19250 cop0 = force_reg (mode, cop0);
19251
19252 switch (mode)
19253 {
19254 case V8SImode:
19255 case V4DImode:
19256 case V4SImode:
19257 case V2DImode:
19258 {
19259 rtx t1, t2, mask;
19260 rtx (*gen_sub3) (rtx, rtx, rtx);
19261
19262 switch (mode)
19263 {
19264 case V8SImode: gen_sub3 = gen_subv8si3; break;
19265 case V4DImode: gen_sub3 = gen_subv4di3; break;
19266 case V4SImode: gen_sub3 = gen_subv4si3; break;
19267 case V2DImode: gen_sub3 = gen_subv2di3; break;
19268 default:
19269 gcc_unreachable ();
19270 }
19271 /* Subtract (-(INT MAX) - 1) from both operands to make
19272 them signed. */
19273 mask = ix86_build_signbit_mask (mode, true, false);
19274 t1 = gen_reg_rtx (mode);
19275 emit_insn (gen_sub3 (t1, cop0, mask));
19276
19277 t2 = gen_reg_rtx (mode);
19278 emit_insn (gen_sub3 (t2, cop1, mask));
19279
19280 cop0 = t1;
19281 cop1 = t2;
19282 code = GT;
19283 }
19284 break;
19285
19286 case V32QImode:
19287 case V16HImode:
19288 case V16QImode:
19289 case V8HImode:
19290 /* Perform a parallel unsigned saturating subtraction. */
19291 x = gen_reg_rtx (mode);
19292 emit_insn (gen_rtx_SET (VOIDmode, x,
19293 gen_rtx_US_MINUS (mode, cop0, cop1)));
19294
19295 cop0 = x;
19296 cop1 = CONST0_RTX (mode);
19297 code = EQ;
19298 negate = !negate;
19299 break;
19300
19301 default:
19302 gcc_unreachable ();
19303 }
19304 }
19305 }
19306
19307 /* Allow the comparison to be done in one mode, but the movcc to
19308 happen in another mode. */
19309 if (data_mode == mode)
19310 {
19311 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19312 operands[1+negate], operands[2-negate]);
19313 }
19314 else
19315 {
19316 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19317 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19318 code, cop0, cop1,
19319 operands[1+negate], operands[2-negate]);
19320 x = gen_lowpart (data_mode, x);
19321 }
19322
19323 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19324 operands[2-negate]);
19325 return true;
19326 }
19327
19328 void
19329 ix86_expand_vshuffle (rtx operands[])
19330 {
19331 rtx target = operands[0];
19332 rtx op0 = operands[1];
19333 rtx op1 = operands[2];
19334 rtx mask = operands[3];
19335 rtx t1, t2, vt, vec[16];
19336 enum machine_mode mode = GET_MODE (op0);
19337 enum machine_mode maskmode = GET_MODE (mask);
19338 int w, e, i;
19339 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19340
19341 /* Number of elements in the vector. */
19342 w = GET_MODE_NUNITS (mode);
19343 e = GET_MODE_UNIT_SIZE (mode);
19344 gcc_assert (w <= 16);
19345
19346 if (TARGET_AVX2)
19347 {
19348 if (mode == V4DImode || mode == V4DFmode)
19349 {
19350 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19351 an constant shuffle operand. With a tiny bit of effort we can
19352 use VPERMD instead. A re-interpretation stall for V4DFmode is
19353 unfortunate but there's no avoiding it. */
19354 t1 = gen_reg_rtx (V8SImode);
19355
19356 /* Replicate the low bits of the V4DImode mask into V8SImode:
19357 mask = { A B C D }
19358 t1 = { A A B B C C D D }. */
19359 for (i = 0; i < 4; ++i)
19360 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19361 vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
19362 vt = force_reg (V8SImode, vt);
19363 mask = gen_lowpart (V8SImode, mask);
19364 emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
19365
19366 /* Multiply the shuffle indicies by two. */
19367 emit_insn (gen_avx2_lshlv8si3 (t1, t1, const1_rtx));
19368
19369 /* Add one to the odd shuffle indicies:
19370 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19371 for (i = 0; i < 4; ++i)
19372 {
19373 vec[i * 2] = const0_rtx;
19374 vec[i * 2 + 1] = const1_rtx;
19375 }
19376 vt = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, vec));
19377 vt = force_const_mem (V8SImode, vt);
19378 emit_insn (gen_addv8si3 (t1, t1, vt));
19379
19380 /* Continue as if V8SImode was used initially. */
19381 operands[3] = mask = t1;
19382 target = gen_lowpart (V8SImode, target);
19383 op0 = gen_lowpart (V8SImode, op0);
19384 op1 = gen_lowpart (V8SImode, op1);
19385 maskmode = mode = V8SImode;
19386 w = 8;
19387 e = 4;
19388 }
19389
19390 switch (mode)
19391 {
19392 case V8SImode:
19393 /* The VPERMD and VPERMPS instructions already properly ignore
19394 the high bits of the shuffle elements. No need for us to
19395 perform an AND ourselves. */
19396 if (one_operand_shuffle)
19397 emit_insn (gen_avx2_permvarv8si (target, mask, op0));
19398 else
19399 {
19400 t1 = gen_reg_rtx (V8SImode);
19401 t2 = gen_reg_rtx (V8SImode);
19402 emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
19403 emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
19404 goto merge_two;
19405 }
19406 return;
19407
19408 case V8SFmode:
19409 mask = gen_lowpart (V8SFmode, mask);
19410 if (one_operand_shuffle)
19411 emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
19412 else
19413 {
19414 t1 = gen_reg_rtx (V8SFmode);
19415 t2 = gen_reg_rtx (V8SFmode);
19416 emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
19417 emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
19418 goto merge_two;
19419 }
19420 return;
19421
19422 case V4SImode:
19423 /* By combining the two 128-bit input vectors into one 256-bit
19424 input vector, we can use VPERMD and VPERMPS for the full
19425 two-operand shuffle. */
19426 t1 = gen_reg_rtx (V8SImode);
19427 t2 = gen_reg_rtx (V8SImode);
19428 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19429 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19430 emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
19431 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19432 return;
19433
19434 case V4SFmode:
19435 t1 = gen_reg_rtx (V8SFmode);
19436 t2 = gen_reg_rtx (V8SFmode);
19437 mask = gen_lowpart (V4SFmode, mask);
19438 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19439 emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
19440 emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
19441 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19442 return;
19443
19444 default:
19445 gcc_assert (GET_MODE_SIZE (mode) <= 16);
19446 break;
19447 }
19448 }
19449
19450 if (TARGET_XOP)
19451 {
19452 /* The XOP VPPERM insn supports three inputs. By ignoring the
19453 one_operand_shuffle special case, we avoid creating another
19454 set of constant vectors in memory. */
19455 one_operand_shuffle = false;
19456
19457 /* mask = mask & {2*w-1, ...} */
19458 vt = GEN_INT (2*w - 1);
19459 }
19460 else
19461 {
19462 /* mask = mask & {w-1, ...} */
19463 vt = GEN_INT (w - 1);
19464 }
19465
19466 for (i = 0; i < w; i++)
19467 vec[i] = vt;
19468 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19469 mask = expand_simple_binop (maskmode, AND, mask, vt,
19470 NULL_RTX, 0, OPTAB_DIRECT);
19471
19472 /* For non-QImode operations, convert the word permutation control
19473 into a byte permutation control. */
19474 if (mode != V16QImode)
19475 {
19476 mask = expand_simple_binop (maskmode, ASHIFT, mask,
19477 GEN_INT (exact_log2 (e)),
19478 NULL_RTX, 0, OPTAB_DIRECT);
19479
19480 /* Convert mask to vector of chars. */
19481 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
19482
19483 /* Replicate each of the input bytes into byte positions:
19484 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19485 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19486 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19487 for (i = 0; i < 16; ++i)
19488 vec[i] = GEN_INT (i/e * e);
19489 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19490 vt = force_const_mem (V16QImode, vt);
19491 if (TARGET_XOP)
19492 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
19493 else
19494 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
19495
19496 /* Convert it into the byte positions by doing
19497 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19498 for (i = 0; i < 16; ++i)
19499 vec[i] = GEN_INT (i % e);
19500 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19501 vt = force_const_mem (V16QImode, vt);
19502 emit_insn (gen_addv16qi3 (mask, mask, vt));
19503 }
19504
19505 /* The actual shuffle operations all operate on V16QImode. */
19506 op0 = gen_lowpart (V16QImode, op0);
19507 op1 = gen_lowpart (V16QImode, op1);
19508 target = gen_lowpart (V16QImode, target);
19509
19510 if (TARGET_XOP)
19511 {
19512 emit_insn (gen_xop_pperm (target, op0, op1, mask));
19513 }
19514 else if (one_operand_shuffle)
19515 {
19516 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
19517 }
19518 else
19519 {
19520 rtx xops[6];
19521 bool ok;
19522
19523 /* Shuffle the two input vectors independently. */
19524 t1 = gen_reg_rtx (V16QImode);
19525 t2 = gen_reg_rtx (V16QImode);
19526 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
19527 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
19528
19529 merge_two:
19530 /* Then merge them together. The key is whether any given control
19531 element contained a bit set that indicates the second word. */
19532 mask = operands[3];
19533 vt = GEN_INT (w);
19534 if (maskmode == V2DImode && !TARGET_SSE4_1)
19535 {
19536 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19537 more shuffle to convert the V2DI input mask into a V4SI
19538 input mask. At which point the masking that expand_int_vcond
19539 will work as desired. */
19540 rtx t3 = gen_reg_rtx (V4SImode);
19541 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
19542 const0_rtx, const0_rtx,
19543 const2_rtx, const2_rtx));
19544 mask = t3;
19545 maskmode = V4SImode;
19546 e = w = 4;
19547 }
19548
19549 for (i = 0; i < w; i++)
19550 vec[i] = vt;
19551 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19552 vt = force_reg (maskmode, vt);
19553 mask = expand_simple_binop (maskmode, AND, mask, vt,
19554 NULL_RTX, 0, OPTAB_DIRECT);
19555
19556 xops[0] = operands[0];
19557 xops[1] = gen_lowpart (mode, t2);
19558 xops[2] = gen_lowpart (mode, t1);
19559 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
19560 xops[4] = mask;
19561 xops[5] = vt;
19562 ok = ix86_expand_int_vcond (xops);
19563 gcc_assert (ok);
19564 }
19565 }
19566
19567 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19568 true if we should do zero extension, else sign extension. HIGH_P is
19569 true if we want the N/2 high elements, else the low elements. */
19570
19571 void
19572 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19573 {
19574 enum machine_mode imode = GET_MODE (operands[1]);
19575 rtx tmp, dest;
19576
19577 if (TARGET_SSE4_1)
19578 {
19579 rtx (*unpack)(rtx, rtx);
19580
19581 switch (imode)
19582 {
19583 case V16QImode:
19584 if (unsigned_p)
19585 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19586 else
19587 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19588 break;
19589 case V8HImode:
19590 if (unsigned_p)
19591 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19592 else
19593 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19594 break;
19595 case V4SImode:
19596 if (unsigned_p)
19597 unpack = gen_sse4_1_zero_extendv2siv2di2;
19598 else
19599 unpack = gen_sse4_1_sign_extendv2siv2di2;
19600 break;
19601 default:
19602 gcc_unreachable ();
19603 }
19604
19605 if (high_p)
19606 {
19607 /* Shift higher 8 bytes to lower 8 bytes. */
19608 tmp = gen_reg_rtx (imode);
19609 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19610 gen_lowpart (V1TImode, operands[1]),
19611 GEN_INT (64)));
19612 }
19613 else
19614 tmp = operands[1];
19615
19616 emit_insn (unpack (operands[0], tmp));
19617 }
19618 else
19619 {
19620 rtx (*unpack)(rtx, rtx, rtx);
19621
19622 switch (imode)
19623 {
19624 case V16QImode:
19625 if (high_p)
19626 unpack = gen_vec_interleave_highv16qi;
19627 else
19628 unpack = gen_vec_interleave_lowv16qi;
19629 break;
19630 case V8HImode:
19631 if (high_p)
19632 unpack = gen_vec_interleave_highv8hi;
19633 else
19634 unpack = gen_vec_interleave_lowv8hi;
19635 break;
19636 case V4SImode:
19637 if (high_p)
19638 unpack = gen_vec_interleave_highv4si;
19639 else
19640 unpack = gen_vec_interleave_lowv4si;
19641 break;
19642 default:
19643 gcc_unreachable ();
19644 }
19645
19646 dest = gen_lowpart (imode, operands[0]);
19647
19648 if (unsigned_p)
19649 tmp = force_reg (imode, CONST0_RTX (imode));
19650 else
19651 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19652 operands[1], pc_rtx, pc_rtx);
19653
19654 emit_insn (unpack (dest, operands[1], tmp));
19655 }
19656 }
19657
19658 /* Expand conditional increment or decrement using adb/sbb instructions.
19659 The default case using setcc followed by the conditional move can be
19660 done by generic code. */
19661 bool
19662 ix86_expand_int_addcc (rtx operands[])
19663 {
19664 enum rtx_code code = GET_CODE (operands[1]);
19665 rtx flags;
19666 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19667 rtx compare_op;
19668 rtx val = const0_rtx;
19669 bool fpcmp = false;
19670 enum machine_mode mode;
19671 rtx op0 = XEXP (operands[1], 0);
19672 rtx op1 = XEXP (operands[1], 1);
19673
19674 if (operands[3] != const1_rtx
19675 && operands[3] != constm1_rtx)
19676 return false;
19677 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19678 return false;
19679 code = GET_CODE (compare_op);
19680
19681 flags = XEXP (compare_op, 0);
19682
19683 if (GET_MODE (flags) == CCFPmode
19684 || GET_MODE (flags) == CCFPUmode)
19685 {
19686 fpcmp = true;
19687 code = ix86_fp_compare_code_to_integer (code);
19688 }
19689
19690 if (code != LTU)
19691 {
19692 val = constm1_rtx;
19693 if (fpcmp)
19694 PUT_CODE (compare_op,
19695 reverse_condition_maybe_unordered
19696 (GET_CODE (compare_op)));
19697 else
19698 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19699 }
19700
19701 mode = GET_MODE (operands[0]);
19702
19703 /* Construct either adc or sbb insn. */
19704 if ((code == LTU) == (operands[3] == constm1_rtx))
19705 {
19706 switch (mode)
19707 {
19708 case QImode:
19709 insn = gen_subqi3_carry;
19710 break;
19711 case HImode:
19712 insn = gen_subhi3_carry;
19713 break;
19714 case SImode:
19715 insn = gen_subsi3_carry;
19716 break;
19717 case DImode:
19718 insn = gen_subdi3_carry;
19719 break;
19720 default:
19721 gcc_unreachable ();
19722 }
19723 }
19724 else
19725 {
19726 switch (mode)
19727 {
19728 case QImode:
19729 insn = gen_addqi3_carry;
19730 break;
19731 case HImode:
19732 insn = gen_addhi3_carry;
19733 break;
19734 case SImode:
19735 insn = gen_addsi3_carry;
19736 break;
19737 case DImode:
19738 insn = gen_adddi3_carry;
19739 break;
19740 default:
19741 gcc_unreachable ();
19742 }
19743 }
19744 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19745
19746 return true;
19747 }
19748
19749
19750 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19751 but works for floating pointer parameters and nonoffsetable memories.
19752 For pushes, it returns just stack offsets; the values will be saved
19753 in the right order. Maximally three parts are generated. */
19754
19755 static int
19756 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19757 {
19758 int size;
19759
19760 if (!TARGET_64BIT)
19761 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19762 else
19763 size = (GET_MODE_SIZE (mode) + 4) / 8;
19764
19765 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19766 gcc_assert (size >= 2 && size <= 4);
19767
19768 /* Optimize constant pool reference to immediates. This is used by fp
19769 moves, that force all constants to memory to allow combining. */
19770 if (MEM_P (operand) && MEM_READONLY_P (operand))
19771 {
19772 rtx tmp = maybe_get_pool_constant (operand);
19773 if (tmp)
19774 operand = tmp;
19775 }
19776
19777 if (MEM_P (operand) && !offsettable_memref_p (operand))
19778 {
19779 /* The only non-offsetable memories we handle are pushes. */
19780 int ok = push_operand (operand, VOIDmode);
19781
19782 gcc_assert (ok);
19783
19784 operand = copy_rtx (operand);
19785 PUT_MODE (operand, Pmode);
19786 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19787 return size;
19788 }
19789
19790 if (GET_CODE (operand) == CONST_VECTOR)
19791 {
19792 enum machine_mode imode = int_mode_for_mode (mode);
19793 /* Caution: if we looked through a constant pool memory above,
19794 the operand may actually have a different mode now. That's
19795 ok, since we want to pun this all the way back to an integer. */
19796 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19797 gcc_assert (operand != NULL);
19798 mode = imode;
19799 }
19800
19801 if (!TARGET_64BIT)
19802 {
19803 if (mode == DImode)
19804 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19805 else
19806 {
19807 int i;
19808
19809 if (REG_P (operand))
19810 {
19811 gcc_assert (reload_completed);
19812 for (i = 0; i < size; i++)
19813 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19814 }
19815 else if (offsettable_memref_p (operand))
19816 {
19817 operand = adjust_address (operand, SImode, 0);
19818 parts[0] = operand;
19819 for (i = 1; i < size; i++)
19820 parts[i] = adjust_address (operand, SImode, 4 * i);
19821 }
19822 else if (GET_CODE (operand) == CONST_DOUBLE)
19823 {
19824 REAL_VALUE_TYPE r;
19825 long l[4];
19826
19827 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19828 switch (mode)
19829 {
19830 case TFmode:
19831 real_to_target (l, &r, mode);
19832 parts[3] = gen_int_mode (l[3], SImode);
19833 parts[2] = gen_int_mode (l[2], SImode);
19834 break;
19835 case XFmode:
19836 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19837 parts[2] = gen_int_mode (l[2], SImode);
19838 break;
19839 case DFmode:
19840 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19841 break;
19842 default:
19843 gcc_unreachable ();
19844 }
19845 parts[1] = gen_int_mode (l[1], SImode);
19846 parts[0] = gen_int_mode (l[0], SImode);
19847 }
19848 else
19849 gcc_unreachable ();
19850 }
19851 }
19852 else
19853 {
19854 if (mode == TImode)
19855 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19856 if (mode == XFmode || mode == TFmode)
19857 {
19858 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19859 if (REG_P (operand))
19860 {
19861 gcc_assert (reload_completed);
19862 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19863 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19864 }
19865 else if (offsettable_memref_p (operand))
19866 {
19867 operand = adjust_address (operand, DImode, 0);
19868 parts[0] = operand;
19869 parts[1] = adjust_address (operand, upper_mode, 8);
19870 }
19871 else if (GET_CODE (operand) == CONST_DOUBLE)
19872 {
19873 REAL_VALUE_TYPE r;
19874 long l[4];
19875
19876 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19877 real_to_target (l, &r, mode);
19878
19879 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19880 if (HOST_BITS_PER_WIDE_INT >= 64)
19881 parts[0]
19882 = gen_int_mode
19883 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19884 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19885 DImode);
19886 else
19887 parts[0] = immed_double_const (l[0], l[1], DImode);
19888
19889 if (upper_mode == SImode)
19890 parts[1] = gen_int_mode (l[2], SImode);
19891 else if (HOST_BITS_PER_WIDE_INT >= 64)
19892 parts[1]
19893 = gen_int_mode
19894 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19895 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19896 DImode);
19897 else
19898 parts[1] = immed_double_const (l[2], l[3], DImode);
19899 }
19900 else
19901 gcc_unreachable ();
19902 }
19903 }
19904
19905 return size;
19906 }
19907
19908 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19909 Return false when normal moves are needed; true when all required
19910 insns have been emitted. Operands 2-4 contain the input values
19911 int the correct order; operands 5-7 contain the output values. */
19912
19913 void
19914 ix86_split_long_move (rtx operands[])
19915 {
19916 rtx part[2][4];
19917 int nparts, i, j;
19918 int push = 0;
19919 int collisions = 0;
19920 enum machine_mode mode = GET_MODE (operands[0]);
19921 bool collisionparts[4];
19922
19923 /* The DFmode expanders may ask us to move double.
19924 For 64bit target this is single move. By hiding the fact
19925 here we simplify i386.md splitters. */
19926 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19927 {
19928 /* Optimize constant pool reference to immediates. This is used by
19929 fp moves, that force all constants to memory to allow combining. */
19930
19931 if (MEM_P (operands[1])
19932 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19933 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19934 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19935 if (push_operand (operands[0], VOIDmode))
19936 {
19937 operands[0] = copy_rtx (operands[0]);
19938 PUT_MODE (operands[0], Pmode);
19939 }
19940 else
19941 operands[0] = gen_lowpart (DImode, operands[0]);
19942 operands[1] = gen_lowpart (DImode, operands[1]);
19943 emit_move_insn (operands[0], operands[1]);
19944 return;
19945 }
19946
19947 /* The only non-offsettable memory we handle is push. */
19948 if (push_operand (operands[0], VOIDmode))
19949 push = 1;
19950 else
19951 gcc_assert (!MEM_P (operands[0])
19952 || offsettable_memref_p (operands[0]));
19953
19954 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19955 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19956
19957 /* When emitting push, take care for source operands on the stack. */
19958 if (push && MEM_P (operands[1])
19959 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19960 {
19961 rtx src_base = XEXP (part[1][nparts - 1], 0);
19962
19963 /* Compensate for the stack decrement by 4. */
19964 if (!TARGET_64BIT && nparts == 3
19965 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19966 src_base = plus_constant (src_base, 4);
19967
19968 /* src_base refers to the stack pointer and is
19969 automatically decreased by emitted push. */
19970 for (i = 0; i < nparts; i++)
19971 part[1][i] = change_address (part[1][i],
19972 GET_MODE (part[1][i]), src_base);
19973 }
19974
19975 /* We need to do copy in the right order in case an address register
19976 of the source overlaps the destination. */
19977 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19978 {
19979 rtx tmp;
19980
19981 for (i = 0; i < nparts; i++)
19982 {
19983 collisionparts[i]
19984 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19985 if (collisionparts[i])
19986 collisions++;
19987 }
19988
19989 /* Collision in the middle part can be handled by reordering. */
19990 if (collisions == 1 && nparts == 3 && collisionparts [1])
19991 {
19992 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19993 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19994 }
19995 else if (collisions == 1
19996 && nparts == 4
19997 && (collisionparts [1] || collisionparts [2]))
19998 {
19999 if (collisionparts [1])
20000 {
20001 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20002 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20003 }
20004 else
20005 {
20006 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20007 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20008 }
20009 }
20010
20011 /* If there are more collisions, we can't handle it by reordering.
20012 Do an lea to the last part and use only one colliding move. */
20013 else if (collisions > 1)
20014 {
20015 rtx base;
20016
20017 collisions = 1;
20018
20019 base = part[0][nparts - 1];
20020
20021 /* Handle the case when the last part isn't valid for lea.
20022 Happens in 64-bit mode storing the 12-byte XFmode. */
20023 if (GET_MODE (base) != Pmode)
20024 base = gen_rtx_REG (Pmode, REGNO (base));
20025
20026 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20027 part[1][0] = replace_equiv_address (part[1][0], base);
20028 for (i = 1; i < nparts; i++)
20029 {
20030 tmp = plus_constant (base, UNITS_PER_WORD * i);
20031 part[1][i] = replace_equiv_address (part[1][i], tmp);
20032 }
20033 }
20034 }
20035
20036 if (push)
20037 {
20038 if (!TARGET_64BIT)
20039 {
20040 if (nparts == 3)
20041 {
20042 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20043 emit_insn (gen_addsi3 (stack_pointer_rtx,
20044 stack_pointer_rtx, GEN_INT (-4)));
20045 emit_move_insn (part[0][2], part[1][2]);
20046 }
20047 else if (nparts == 4)
20048 {
20049 emit_move_insn (part[0][3], part[1][3]);
20050 emit_move_insn (part[0][2], part[1][2]);
20051 }
20052 }
20053 else
20054 {
20055 /* In 64bit mode we don't have 32bit push available. In case this is
20056 register, it is OK - we will just use larger counterpart. We also
20057 retype memory - these comes from attempt to avoid REX prefix on
20058 moving of second half of TFmode value. */
20059 if (GET_MODE (part[1][1]) == SImode)
20060 {
20061 switch (GET_CODE (part[1][1]))
20062 {
20063 case MEM:
20064 part[1][1] = adjust_address (part[1][1], DImode, 0);
20065 break;
20066
20067 case REG:
20068 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20069 break;
20070
20071 default:
20072 gcc_unreachable ();
20073 }
20074
20075 if (GET_MODE (part[1][0]) == SImode)
20076 part[1][0] = part[1][1];
20077 }
20078 }
20079 emit_move_insn (part[0][1], part[1][1]);
20080 emit_move_insn (part[0][0], part[1][0]);
20081 return;
20082 }
20083
20084 /* Choose correct order to not overwrite the source before it is copied. */
20085 if ((REG_P (part[0][0])
20086 && REG_P (part[1][1])
20087 && (REGNO (part[0][0]) == REGNO (part[1][1])
20088 || (nparts == 3
20089 && REGNO (part[0][0]) == REGNO (part[1][2]))
20090 || (nparts == 4
20091 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20092 || (collisions > 0
20093 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20094 {
20095 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20096 {
20097 operands[2 + i] = part[0][j];
20098 operands[6 + i] = part[1][j];
20099 }
20100 }
20101 else
20102 {
20103 for (i = 0; i < nparts; i++)
20104 {
20105 operands[2 + i] = part[0][i];
20106 operands[6 + i] = part[1][i];
20107 }
20108 }
20109
20110 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20111 if (optimize_insn_for_size_p ())
20112 {
20113 for (j = 0; j < nparts - 1; j++)
20114 if (CONST_INT_P (operands[6 + j])
20115 && operands[6 + j] != const0_rtx
20116 && REG_P (operands[2 + j]))
20117 for (i = j; i < nparts - 1; i++)
20118 if (CONST_INT_P (operands[7 + i])
20119 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20120 operands[7 + i] = operands[2 + j];
20121 }
20122
20123 for (i = 0; i < nparts; i++)
20124 emit_move_insn (operands[2 + i], operands[6 + i]);
20125
20126 return;
20127 }
20128
20129 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20130 left shift by a constant, either using a single shift or
20131 a sequence of add instructions. */
20132
20133 static void
20134 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20135 {
20136 rtx (*insn)(rtx, rtx, rtx);
20137
20138 if (count == 1
20139 || (count * ix86_cost->add <= ix86_cost->shift_const
20140 && !optimize_insn_for_size_p ()))
20141 {
20142 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20143 while (count-- > 0)
20144 emit_insn (insn (operand, operand, operand));
20145 }
20146 else
20147 {
20148 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20149 emit_insn (insn (operand, operand, GEN_INT (count)));
20150 }
20151 }
20152
20153 void
20154 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20155 {
20156 rtx (*gen_ashl3)(rtx, rtx, rtx);
20157 rtx (*gen_shld)(rtx, rtx, rtx);
20158 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20159
20160 rtx low[2], high[2];
20161 int count;
20162
20163 if (CONST_INT_P (operands[2]))
20164 {
20165 split_double_mode (mode, operands, 2, low, high);
20166 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20167
20168 if (count >= half_width)
20169 {
20170 emit_move_insn (high[0], low[1]);
20171 emit_move_insn (low[0], const0_rtx);
20172
20173 if (count > half_width)
20174 ix86_expand_ashl_const (high[0], count - half_width, mode);
20175 }
20176 else
20177 {
20178 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20179
20180 if (!rtx_equal_p (operands[0], operands[1]))
20181 emit_move_insn (operands[0], operands[1]);
20182
20183 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20184 ix86_expand_ashl_const (low[0], count, mode);
20185 }
20186 return;
20187 }
20188
20189 split_double_mode (mode, operands, 1, low, high);
20190
20191 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20192
20193 if (operands[1] == const1_rtx)
20194 {
20195 /* Assuming we've chosen a QImode capable registers, then 1 << N
20196 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20197 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20198 {
20199 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20200
20201 ix86_expand_clear (low[0]);
20202 ix86_expand_clear (high[0]);
20203 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20204
20205 d = gen_lowpart (QImode, low[0]);
20206 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20207 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20208 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20209
20210 d = gen_lowpart (QImode, high[0]);
20211 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20212 s = gen_rtx_NE (QImode, flags, const0_rtx);
20213 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20214 }
20215
20216 /* Otherwise, we can get the same results by manually performing
20217 a bit extract operation on bit 5/6, and then performing the two
20218 shifts. The two methods of getting 0/1 into low/high are exactly
20219 the same size. Avoiding the shift in the bit extract case helps
20220 pentium4 a bit; no one else seems to care much either way. */
20221 else
20222 {
20223 enum machine_mode half_mode;
20224 rtx (*gen_lshr3)(rtx, rtx, rtx);
20225 rtx (*gen_and3)(rtx, rtx, rtx);
20226 rtx (*gen_xor3)(rtx, rtx, rtx);
20227 HOST_WIDE_INT bits;
20228 rtx x;
20229
20230 if (mode == DImode)
20231 {
20232 half_mode = SImode;
20233 gen_lshr3 = gen_lshrsi3;
20234 gen_and3 = gen_andsi3;
20235 gen_xor3 = gen_xorsi3;
20236 bits = 5;
20237 }
20238 else
20239 {
20240 half_mode = DImode;
20241 gen_lshr3 = gen_lshrdi3;
20242 gen_and3 = gen_anddi3;
20243 gen_xor3 = gen_xordi3;
20244 bits = 6;
20245 }
20246
20247 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20248 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20249 else
20250 x = gen_lowpart (half_mode, operands[2]);
20251 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20252
20253 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20254 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20255 emit_move_insn (low[0], high[0]);
20256 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20257 }
20258
20259 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20260 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20261 return;
20262 }
20263
20264 if (operands[1] == constm1_rtx)
20265 {
20266 /* For -1 << N, we can avoid the shld instruction, because we
20267 know that we're shifting 0...31/63 ones into a -1. */
20268 emit_move_insn (low[0], constm1_rtx);
20269 if (optimize_insn_for_size_p ())
20270 emit_move_insn (high[0], low[0]);
20271 else
20272 emit_move_insn (high[0], constm1_rtx);
20273 }
20274 else
20275 {
20276 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20277
20278 if (!rtx_equal_p (operands[0], operands[1]))
20279 emit_move_insn (operands[0], operands[1]);
20280
20281 split_double_mode (mode, operands, 1, low, high);
20282 emit_insn (gen_shld (high[0], low[0], operands[2]));
20283 }
20284
20285 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20286
20287 if (TARGET_CMOVE && scratch)
20288 {
20289 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20290 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20291
20292 ix86_expand_clear (scratch);
20293 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20294 }
20295 else
20296 {
20297 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20298 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20299
20300 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20301 }
20302 }
20303
20304 void
20305 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20306 {
20307 rtx (*gen_ashr3)(rtx, rtx, rtx)
20308 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20309 rtx (*gen_shrd)(rtx, rtx, rtx);
20310 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20311
20312 rtx low[2], high[2];
20313 int count;
20314
20315 if (CONST_INT_P (operands[2]))
20316 {
20317 split_double_mode (mode, operands, 2, low, high);
20318 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20319
20320 if (count == GET_MODE_BITSIZE (mode) - 1)
20321 {
20322 emit_move_insn (high[0], high[1]);
20323 emit_insn (gen_ashr3 (high[0], high[0],
20324 GEN_INT (half_width - 1)));
20325 emit_move_insn (low[0], high[0]);
20326
20327 }
20328 else if (count >= half_width)
20329 {
20330 emit_move_insn (low[0], high[1]);
20331 emit_move_insn (high[0], low[0]);
20332 emit_insn (gen_ashr3 (high[0], high[0],
20333 GEN_INT (half_width - 1)));
20334
20335 if (count > half_width)
20336 emit_insn (gen_ashr3 (low[0], low[0],
20337 GEN_INT (count - half_width)));
20338 }
20339 else
20340 {
20341 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20342
20343 if (!rtx_equal_p (operands[0], operands[1]))
20344 emit_move_insn (operands[0], operands[1]);
20345
20346 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20347 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20348 }
20349 }
20350 else
20351 {
20352 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20353
20354 if (!rtx_equal_p (operands[0], operands[1]))
20355 emit_move_insn (operands[0], operands[1]);
20356
20357 split_double_mode (mode, operands, 1, low, high);
20358
20359 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20360 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20361
20362 if (TARGET_CMOVE && scratch)
20363 {
20364 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20365 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20366
20367 emit_move_insn (scratch, high[0]);
20368 emit_insn (gen_ashr3 (scratch, scratch,
20369 GEN_INT (half_width - 1)));
20370 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20371 scratch));
20372 }
20373 else
20374 {
20375 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20376 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20377
20378 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20379 }
20380 }
20381 }
20382
20383 void
20384 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20385 {
20386 rtx (*gen_lshr3)(rtx, rtx, rtx)
20387 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20388 rtx (*gen_shrd)(rtx, rtx, rtx);
20389 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20390
20391 rtx low[2], high[2];
20392 int count;
20393
20394 if (CONST_INT_P (operands[2]))
20395 {
20396 split_double_mode (mode, operands, 2, low, high);
20397 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20398
20399 if (count >= half_width)
20400 {
20401 emit_move_insn (low[0], high[1]);
20402 ix86_expand_clear (high[0]);
20403
20404 if (count > half_width)
20405 emit_insn (gen_lshr3 (low[0], low[0],
20406 GEN_INT (count - half_width)));
20407 }
20408 else
20409 {
20410 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20411
20412 if (!rtx_equal_p (operands[0], operands[1]))
20413 emit_move_insn (operands[0], operands[1]);
20414
20415 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20416 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20417 }
20418 }
20419 else
20420 {
20421 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20422
20423 if (!rtx_equal_p (operands[0], operands[1]))
20424 emit_move_insn (operands[0], operands[1]);
20425
20426 split_double_mode (mode, operands, 1, low, high);
20427
20428 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20429 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20430
20431 if (TARGET_CMOVE && scratch)
20432 {
20433 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20434 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20435
20436 ix86_expand_clear (scratch);
20437 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20438 scratch));
20439 }
20440 else
20441 {
20442 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20443 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20444
20445 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20446 }
20447 }
20448 }
20449
20450 /* Predict just emitted jump instruction to be taken with probability PROB. */
20451 static void
20452 predict_jump (int prob)
20453 {
20454 rtx insn = get_last_insn ();
20455 gcc_assert (JUMP_P (insn));
20456 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20457 }
20458
20459 /* Helper function for the string operations below. Dest VARIABLE whether
20460 it is aligned to VALUE bytes. If true, jump to the label. */
20461 static rtx
20462 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20463 {
20464 rtx label = gen_label_rtx ();
20465 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20466 if (GET_MODE (variable) == DImode)
20467 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20468 else
20469 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20470 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20471 1, label);
20472 if (epilogue)
20473 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20474 else
20475 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20476 return label;
20477 }
20478
20479 /* Adjust COUNTER by the VALUE. */
20480 static void
20481 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20482 {
20483 rtx (*gen_add)(rtx, rtx, rtx)
20484 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20485
20486 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20487 }
20488
20489 /* Zero extend possibly SImode EXP to Pmode register. */
20490 rtx
20491 ix86_zero_extend_to_Pmode (rtx exp)
20492 {
20493 rtx r;
20494 if (GET_MODE (exp) == VOIDmode)
20495 return force_reg (Pmode, exp);
20496 if (GET_MODE (exp) == Pmode)
20497 return copy_to_mode_reg (Pmode, exp);
20498 r = gen_reg_rtx (Pmode);
20499 emit_insn (gen_zero_extendsidi2 (r, exp));
20500 return r;
20501 }
20502
20503 /* Divide COUNTREG by SCALE. */
20504 static rtx
20505 scale_counter (rtx countreg, int scale)
20506 {
20507 rtx sc;
20508
20509 if (scale == 1)
20510 return countreg;
20511 if (CONST_INT_P (countreg))
20512 return GEN_INT (INTVAL (countreg) / scale);
20513 gcc_assert (REG_P (countreg));
20514
20515 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20516 GEN_INT (exact_log2 (scale)),
20517 NULL, 1, OPTAB_DIRECT);
20518 return sc;
20519 }
20520
20521 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20522 DImode for constant loop counts. */
20523
20524 static enum machine_mode
20525 counter_mode (rtx count_exp)
20526 {
20527 if (GET_MODE (count_exp) != VOIDmode)
20528 return GET_MODE (count_exp);
20529 if (!CONST_INT_P (count_exp))
20530 return Pmode;
20531 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20532 return DImode;
20533 return SImode;
20534 }
20535
20536 /* When SRCPTR is non-NULL, output simple loop to move memory
20537 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20538 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20539 equivalent loop to set memory by VALUE (supposed to be in MODE).
20540
20541 The size is rounded down to whole number of chunk size moved at once.
20542 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20543
20544
20545 static void
20546 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20547 rtx destptr, rtx srcptr, rtx value,
20548 rtx count, enum machine_mode mode, int unroll,
20549 int expected_size)
20550 {
20551 rtx out_label, top_label, iter, tmp;
20552 enum machine_mode iter_mode = counter_mode (count);
20553 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20554 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20555 rtx size;
20556 rtx x_addr;
20557 rtx y_addr;
20558 int i;
20559
20560 top_label = gen_label_rtx ();
20561 out_label = gen_label_rtx ();
20562 iter = gen_reg_rtx (iter_mode);
20563
20564 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20565 NULL, 1, OPTAB_DIRECT);
20566 /* Those two should combine. */
20567 if (piece_size == const1_rtx)
20568 {
20569 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20570 true, out_label);
20571 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20572 }
20573 emit_move_insn (iter, const0_rtx);
20574
20575 emit_label (top_label);
20576
20577 tmp = convert_modes (Pmode, iter_mode, iter, true);
20578 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20579 destmem = change_address (destmem, mode, x_addr);
20580
20581 if (srcmem)
20582 {
20583 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20584 srcmem = change_address (srcmem, mode, y_addr);
20585
20586 /* When unrolling for chips that reorder memory reads and writes,
20587 we can save registers by using single temporary.
20588 Also using 4 temporaries is overkill in 32bit mode. */
20589 if (!TARGET_64BIT && 0)
20590 {
20591 for (i = 0; i < unroll; i++)
20592 {
20593 if (i)
20594 {
20595 destmem =
20596 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20597 srcmem =
20598 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20599 }
20600 emit_move_insn (destmem, srcmem);
20601 }
20602 }
20603 else
20604 {
20605 rtx tmpreg[4];
20606 gcc_assert (unroll <= 4);
20607 for (i = 0; i < unroll; i++)
20608 {
20609 tmpreg[i] = gen_reg_rtx (mode);
20610 if (i)
20611 {
20612 srcmem =
20613 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20614 }
20615 emit_move_insn (tmpreg[i], srcmem);
20616 }
20617 for (i = 0; i < unroll; i++)
20618 {
20619 if (i)
20620 {
20621 destmem =
20622 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20623 }
20624 emit_move_insn (destmem, tmpreg[i]);
20625 }
20626 }
20627 }
20628 else
20629 for (i = 0; i < unroll; i++)
20630 {
20631 if (i)
20632 destmem =
20633 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20634 emit_move_insn (destmem, value);
20635 }
20636
20637 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20638 true, OPTAB_LIB_WIDEN);
20639 if (tmp != iter)
20640 emit_move_insn (iter, tmp);
20641
20642 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20643 true, top_label);
20644 if (expected_size != -1)
20645 {
20646 expected_size /= GET_MODE_SIZE (mode) * unroll;
20647 if (expected_size == 0)
20648 predict_jump (0);
20649 else if (expected_size > REG_BR_PROB_BASE)
20650 predict_jump (REG_BR_PROB_BASE - 1);
20651 else
20652 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20653 }
20654 else
20655 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20656 iter = ix86_zero_extend_to_Pmode (iter);
20657 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20658 true, OPTAB_LIB_WIDEN);
20659 if (tmp != destptr)
20660 emit_move_insn (destptr, tmp);
20661 if (srcptr)
20662 {
20663 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20664 true, OPTAB_LIB_WIDEN);
20665 if (tmp != srcptr)
20666 emit_move_insn (srcptr, tmp);
20667 }
20668 emit_label (out_label);
20669 }
20670
20671 /* Output "rep; mov" instruction.
20672 Arguments have same meaning as for previous function */
20673 static void
20674 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20675 rtx destptr, rtx srcptr,
20676 rtx count,
20677 enum machine_mode mode)
20678 {
20679 rtx destexp;
20680 rtx srcexp;
20681 rtx countreg;
20682 HOST_WIDE_INT rounded_count;
20683
20684 /* If the size is known, it is shorter to use rep movs. */
20685 if (mode == QImode && CONST_INT_P (count)
20686 && !(INTVAL (count) & 3))
20687 mode = SImode;
20688
20689 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20690 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20691 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20692 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20693 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20694 if (mode != QImode)
20695 {
20696 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20697 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20698 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20699 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20700 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20701 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20702 }
20703 else
20704 {
20705 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20706 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20707 }
20708 if (CONST_INT_P (count))
20709 {
20710 rounded_count = (INTVAL (count)
20711 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20712 destmem = shallow_copy_rtx (destmem);
20713 srcmem = shallow_copy_rtx (srcmem);
20714 set_mem_size (destmem, rounded_count);
20715 set_mem_size (srcmem, rounded_count);
20716 }
20717 else
20718 {
20719 if (MEM_SIZE_KNOWN_P (destmem))
20720 clear_mem_size (destmem);
20721 if (MEM_SIZE_KNOWN_P (srcmem))
20722 clear_mem_size (srcmem);
20723 }
20724 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20725 destexp, srcexp));
20726 }
20727
20728 /* Output "rep; stos" instruction.
20729 Arguments have same meaning as for previous function */
20730 static void
20731 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20732 rtx count, enum machine_mode mode,
20733 rtx orig_value)
20734 {
20735 rtx destexp;
20736 rtx countreg;
20737 HOST_WIDE_INT rounded_count;
20738
20739 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20740 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20741 value = force_reg (mode, gen_lowpart (mode, value));
20742 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20743 if (mode != QImode)
20744 {
20745 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20746 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20747 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20748 }
20749 else
20750 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20751 if (orig_value == const0_rtx && CONST_INT_P (count))
20752 {
20753 rounded_count = (INTVAL (count)
20754 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20755 destmem = shallow_copy_rtx (destmem);
20756 set_mem_size (destmem, rounded_count);
20757 }
20758 else if (MEM_SIZE_KNOWN_P (destmem))
20759 clear_mem_size (destmem);
20760 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20761 }
20762
20763 static void
20764 emit_strmov (rtx destmem, rtx srcmem,
20765 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20766 {
20767 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20768 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20769 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20770 }
20771
20772 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20773 static void
20774 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20775 rtx destptr, rtx srcptr, rtx count, int max_size)
20776 {
20777 rtx src, dest;
20778 if (CONST_INT_P (count))
20779 {
20780 HOST_WIDE_INT countval = INTVAL (count);
20781 int offset = 0;
20782
20783 if ((countval & 0x10) && max_size > 16)
20784 {
20785 if (TARGET_64BIT)
20786 {
20787 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20788 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20789 }
20790 else
20791 gcc_unreachable ();
20792 offset += 16;
20793 }
20794 if ((countval & 0x08) && max_size > 8)
20795 {
20796 if (TARGET_64BIT)
20797 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20798 else
20799 {
20800 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20801 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20802 }
20803 offset += 8;
20804 }
20805 if ((countval & 0x04) && max_size > 4)
20806 {
20807 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20808 offset += 4;
20809 }
20810 if ((countval & 0x02) && max_size > 2)
20811 {
20812 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20813 offset += 2;
20814 }
20815 if ((countval & 0x01) && max_size > 1)
20816 {
20817 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20818 offset += 1;
20819 }
20820 return;
20821 }
20822 if (max_size > 8)
20823 {
20824 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20825 count, 1, OPTAB_DIRECT);
20826 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20827 count, QImode, 1, 4);
20828 return;
20829 }
20830
20831 /* When there are stringops, we can cheaply increase dest and src pointers.
20832 Otherwise we save code size by maintaining offset (zero is readily
20833 available from preceding rep operation) and using x86 addressing modes.
20834 */
20835 if (TARGET_SINGLE_STRINGOP)
20836 {
20837 if (max_size > 4)
20838 {
20839 rtx label = ix86_expand_aligntest (count, 4, true);
20840 src = change_address (srcmem, SImode, srcptr);
20841 dest = change_address (destmem, SImode, destptr);
20842 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20843 emit_label (label);
20844 LABEL_NUSES (label) = 1;
20845 }
20846 if (max_size > 2)
20847 {
20848 rtx label = ix86_expand_aligntest (count, 2, true);
20849 src = change_address (srcmem, HImode, srcptr);
20850 dest = change_address (destmem, HImode, destptr);
20851 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20852 emit_label (label);
20853 LABEL_NUSES (label) = 1;
20854 }
20855 if (max_size > 1)
20856 {
20857 rtx label = ix86_expand_aligntest (count, 1, true);
20858 src = change_address (srcmem, QImode, srcptr);
20859 dest = change_address (destmem, QImode, destptr);
20860 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20861 emit_label (label);
20862 LABEL_NUSES (label) = 1;
20863 }
20864 }
20865 else
20866 {
20867 rtx offset = force_reg (Pmode, const0_rtx);
20868 rtx tmp;
20869
20870 if (max_size > 4)
20871 {
20872 rtx label = ix86_expand_aligntest (count, 4, true);
20873 src = change_address (srcmem, SImode, srcptr);
20874 dest = change_address (destmem, SImode, destptr);
20875 emit_move_insn (dest, src);
20876 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20877 true, OPTAB_LIB_WIDEN);
20878 if (tmp != offset)
20879 emit_move_insn (offset, tmp);
20880 emit_label (label);
20881 LABEL_NUSES (label) = 1;
20882 }
20883 if (max_size > 2)
20884 {
20885 rtx label = ix86_expand_aligntest (count, 2, true);
20886 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20887 src = change_address (srcmem, HImode, tmp);
20888 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20889 dest = change_address (destmem, HImode, tmp);
20890 emit_move_insn (dest, src);
20891 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20892 true, OPTAB_LIB_WIDEN);
20893 if (tmp != offset)
20894 emit_move_insn (offset, tmp);
20895 emit_label (label);
20896 LABEL_NUSES (label) = 1;
20897 }
20898 if (max_size > 1)
20899 {
20900 rtx label = ix86_expand_aligntest (count, 1, true);
20901 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20902 src = change_address (srcmem, QImode, tmp);
20903 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20904 dest = change_address (destmem, QImode, tmp);
20905 emit_move_insn (dest, src);
20906 emit_label (label);
20907 LABEL_NUSES (label) = 1;
20908 }
20909 }
20910 }
20911
20912 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20913 static void
20914 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20915 rtx count, int max_size)
20916 {
20917 count =
20918 expand_simple_binop (counter_mode (count), AND, count,
20919 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20920 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20921 gen_lowpart (QImode, value), count, QImode,
20922 1, max_size / 2);
20923 }
20924
20925 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20926 static void
20927 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20928 {
20929 rtx dest;
20930
20931 if (CONST_INT_P (count))
20932 {
20933 HOST_WIDE_INT countval = INTVAL (count);
20934 int offset = 0;
20935
20936 if ((countval & 0x10) && max_size > 16)
20937 {
20938 if (TARGET_64BIT)
20939 {
20940 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20941 emit_insn (gen_strset (destptr, dest, value));
20942 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20943 emit_insn (gen_strset (destptr, dest, value));
20944 }
20945 else
20946 gcc_unreachable ();
20947 offset += 16;
20948 }
20949 if ((countval & 0x08) && max_size > 8)
20950 {
20951 if (TARGET_64BIT)
20952 {
20953 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20954 emit_insn (gen_strset (destptr, dest, value));
20955 }
20956 else
20957 {
20958 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20959 emit_insn (gen_strset (destptr, dest, value));
20960 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20961 emit_insn (gen_strset (destptr, dest, value));
20962 }
20963 offset += 8;
20964 }
20965 if ((countval & 0x04) && max_size > 4)
20966 {
20967 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20968 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20969 offset += 4;
20970 }
20971 if ((countval & 0x02) && max_size > 2)
20972 {
20973 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20974 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20975 offset += 2;
20976 }
20977 if ((countval & 0x01) && max_size > 1)
20978 {
20979 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20980 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20981 offset += 1;
20982 }
20983 return;
20984 }
20985 if (max_size > 32)
20986 {
20987 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20988 return;
20989 }
20990 if (max_size > 16)
20991 {
20992 rtx label = ix86_expand_aligntest (count, 16, true);
20993 if (TARGET_64BIT)
20994 {
20995 dest = change_address (destmem, DImode, destptr);
20996 emit_insn (gen_strset (destptr, dest, value));
20997 emit_insn (gen_strset (destptr, dest, value));
20998 }
20999 else
21000 {
21001 dest = change_address (destmem, SImode, destptr);
21002 emit_insn (gen_strset (destptr, dest, value));
21003 emit_insn (gen_strset (destptr, dest, value));
21004 emit_insn (gen_strset (destptr, dest, value));
21005 emit_insn (gen_strset (destptr, dest, value));
21006 }
21007 emit_label (label);
21008 LABEL_NUSES (label) = 1;
21009 }
21010 if (max_size > 8)
21011 {
21012 rtx label = ix86_expand_aligntest (count, 8, true);
21013 if (TARGET_64BIT)
21014 {
21015 dest = change_address (destmem, DImode, destptr);
21016 emit_insn (gen_strset (destptr, dest, value));
21017 }
21018 else
21019 {
21020 dest = change_address (destmem, SImode, destptr);
21021 emit_insn (gen_strset (destptr, dest, value));
21022 emit_insn (gen_strset (destptr, dest, value));
21023 }
21024 emit_label (label);
21025 LABEL_NUSES (label) = 1;
21026 }
21027 if (max_size > 4)
21028 {
21029 rtx label = ix86_expand_aligntest (count, 4, true);
21030 dest = change_address (destmem, SImode, destptr);
21031 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21032 emit_label (label);
21033 LABEL_NUSES (label) = 1;
21034 }
21035 if (max_size > 2)
21036 {
21037 rtx label = ix86_expand_aligntest (count, 2, true);
21038 dest = change_address (destmem, HImode, destptr);
21039 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21040 emit_label (label);
21041 LABEL_NUSES (label) = 1;
21042 }
21043 if (max_size > 1)
21044 {
21045 rtx label = ix86_expand_aligntest (count, 1, true);
21046 dest = change_address (destmem, QImode, destptr);
21047 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21048 emit_label (label);
21049 LABEL_NUSES (label) = 1;
21050 }
21051 }
21052
21053 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21054 DESIRED_ALIGNMENT. */
21055 static void
21056 expand_movmem_prologue (rtx destmem, rtx srcmem,
21057 rtx destptr, rtx srcptr, rtx count,
21058 int align, int desired_alignment)
21059 {
21060 if (align <= 1 && desired_alignment > 1)
21061 {
21062 rtx label = ix86_expand_aligntest (destptr, 1, false);
21063 srcmem = change_address (srcmem, QImode, srcptr);
21064 destmem = change_address (destmem, QImode, destptr);
21065 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21066 ix86_adjust_counter (count, 1);
21067 emit_label (label);
21068 LABEL_NUSES (label) = 1;
21069 }
21070 if (align <= 2 && desired_alignment > 2)
21071 {
21072 rtx label = ix86_expand_aligntest (destptr, 2, false);
21073 srcmem = change_address (srcmem, HImode, srcptr);
21074 destmem = change_address (destmem, HImode, destptr);
21075 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21076 ix86_adjust_counter (count, 2);
21077 emit_label (label);
21078 LABEL_NUSES (label) = 1;
21079 }
21080 if (align <= 4 && desired_alignment > 4)
21081 {
21082 rtx label = ix86_expand_aligntest (destptr, 4, false);
21083 srcmem = change_address (srcmem, SImode, srcptr);
21084 destmem = change_address (destmem, SImode, destptr);
21085 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21086 ix86_adjust_counter (count, 4);
21087 emit_label (label);
21088 LABEL_NUSES (label) = 1;
21089 }
21090 gcc_assert (desired_alignment <= 8);
21091 }
21092
21093 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21094 ALIGN_BYTES is how many bytes need to be copied. */
21095 static rtx
21096 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21097 int desired_align, int align_bytes)
21098 {
21099 rtx src = *srcp;
21100 rtx orig_dst = dst;
21101 rtx orig_src = src;
21102 int off = 0;
21103 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21104 if (src_align_bytes >= 0)
21105 src_align_bytes = desired_align - src_align_bytes;
21106 if (align_bytes & 1)
21107 {
21108 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21109 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21110 off = 1;
21111 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21112 }
21113 if (align_bytes & 2)
21114 {
21115 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21116 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21117 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21118 set_mem_align (dst, 2 * BITS_PER_UNIT);
21119 if (src_align_bytes >= 0
21120 && (src_align_bytes & 1) == (align_bytes & 1)
21121 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21122 set_mem_align (src, 2 * BITS_PER_UNIT);
21123 off = 2;
21124 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21125 }
21126 if (align_bytes & 4)
21127 {
21128 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21129 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21130 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21131 set_mem_align (dst, 4 * BITS_PER_UNIT);
21132 if (src_align_bytes >= 0)
21133 {
21134 unsigned int src_align = 0;
21135 if ((src_align_bytes & 3) == (align_bytes & 3))
21136 src_align = 4;
21137 else if ((src_align_bytes & 1) == (align_bytes & 1))
21138 src_align = 2;
21139 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21140 set_mem_align (src, src_align * BITS_PER_UNIT);
21141 }
21142 off = 4;
21143 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21144 }
21145 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21146 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21147 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21148 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21149 if (src_align_bytes >= 0)
21150 {
21151 unsigned int src_align = 0;
21152 if ((src_align_bytes & 7) == (align_bytes & 7))
21153 src_align = 8;
21154 else if ((src_align_bytes & 3) == (align_bytes & 3))
21155 src_align = 4;
21156 else if ((src_align_bytes & 1) == (align_bytes & 1))
21157 src_align = 2;
21158 if (src_align > (unsigned int) desired_align)
21159 src_align = desired_align;
21160 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21161 set_mem_align (src, src_align * BITS_PER_UNIT);
21162 }
21163 if (MEM_SIZE_KNOWN_P (orig_dst))
21164 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21165 if (MEM_SIZE_KNOWN_P (orig_src))
21166 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21167 *srcp = src;
21168 return dst;
21169 }
21170
21171 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21172 DESIRED_ALIGNMENT. */
21173 static void
21174 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21175 int align, int desired_alignment)
21176 {
21177 if (align <= 1 && desired_alignment > 1)
21178 {
21179 rtx label = ix86_expand_aligntest (destptr, 1, false);
21180 destmem = change_address (destmem, QImode, destptr);
21181 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21182 ix86_adjust_counter (count, 1);
21183 emit_label (label);
21184 LABEL_NUSES (label) = 1;
21185 }
21186 if (align <= 2 && desired_alignment > 2)
21187 {
21188 rtx label = ix86_expand_aligntest (destptr, 2, false);
21189 destmem = change_address (destmem, HImode, destptr);
21190 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21191 ix86_adjust_counter (count, 2);
21192 emit_label (label);
21193 LABEL_NUSES (label) = 1;
21194 }
21195 if (align <= 4 && desired_alignment > 4)
21196 {
21197 rtx label = ix86_expand_aligntest (destptr, 4, false);
21198 destmem = change_address (destmem, SImode, destptr);
21199 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21200 ix86_adjust_counter (count, 4);
21201 emit_label (label);
21202 LABEL_NUSES (label) = 1;
21203 }
21204 gcc_assert (desired_alignment <= 8);
21205 }
21206
21207 /* Set enough from DST to align DST known to by aligned by ALIGN to
21208 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21209 static rtx
21210 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21211 int desired_align, int align_bytes)
21212 {
21213 int off = 0;
21214 rtx orig_dst = dst;
21215 if (align_bytes & 1)
21216 {
21217 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21218 off = 1;
21219 emit_insn (gen_strset (destreg, dst,
21220 gen_lowpart (QImode, value)));
21221 }
21222 if (align_bytes & 2)
21223 {
21224 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21225 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21226 set_mem_align (dst, 2 * BITS_PER_UNIT);
21227 off = 2;
21228 emit_insn (gen_strset (destreg, dst,
21229 gen_lowpart (HImode, value)));
21230 }
21231 if (align_bytes & 4)
21232 {
21233 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21234 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21235 set_mem_align (dst, 4 * BITS_PER_UNIT);
21236 off = 4;
21237 emit_insn (gen_strset (destreg, dst,
21238 gen_lowpart (SImode, value)));
21239 }
21240 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21241 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21242 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21243 if (MEM_SIZE_KNOWN_P (orig_dst))
21244 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21245 return dst;
21246 }
21247
21248 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21249 static enum stringop_alg
21250 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21251 int *dynamic_check)
21252 {
21253 const struct stringop_algs * algs;
21254 bool optimize_for_speed;
21255 /* Algorithms using the rep prefix want at least edi and ecx;
21256 additionally, memset wants eax and memcpy wants esi. Don't
21257 consider such algorithms if the user has appropriated those
21258 registers for their own purposes. */
21259 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21260 || (memset
21261 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21262
21263 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21264 || (alg != rep_prefix_1_byte \
21265 && alg != rep_prefix_4_byte \
21266 && alg != rep_prefix_8_byte))
21267 const struct processor_costs *cost;
21268
21269 /* Even if the string operation call is cold, we still might spend a lot
21270 of time processing large blocks. */
21271 if (optimize_function_for_size_p (cfun)
21272 || (optimize_insn_for_size_p ()
21273 && expected_size != -1 && expected_size < 256))
21274 optimize_for_speed = false;
21275 else
21276 optimize_for_speed = true;
21277
21278 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21279
21280 *dynamic_check = -1;
21281 if (memset)
21282 algs = &cost->memset[TARGET_64BIT != 0];
21283 else
21284 algs = &cost->memcpy[TARGET_64BIT != 0];
21285 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21286 return ix86_stringop_alg;
21287 /* rep; movq or rep; movl is the smallest variant. */
21288 else if (!optimize_for_speed)
21289 {
21290 if (!count || (count & 3))
21291 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21292 else
21293 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21294 }
21295 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21296 */
21297 else if (expected_size != -1 && expected_size < 4)
21298 return loop_1_byte;
21299 else if (expected_size != -1)
21300 {
21301 unsigned int i;
21302 enum stringop_alg alg = libcall;
21303 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21304 {
21305 /* We get here if the algorithms that were not libcall-based
21306 were rep-prefix based and we are unable to use rep prefixes
21307 based on global register usage. Break out of the loop and
21308 use the heuristic below. */
21309 if (algs->size[i].max == 0)
21310 break;
21311 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21312 {
21313 enum stringop_alg candidate = algs->size[i].alg;
21314
21315 if (candidate != libcall && ALG_USABLE_P (candidate))
21316 alg = candidate;
21317 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21318 last non-libcall inline algorithm. */
21319 if (TARGET_INLINE_ALL_STRINGOPS)
21320 {
21321 /* When the current size is best to be copied by a libcall,
21322 but we are still forced to inline, run the heuristic below
21323 that will pick code for medium sized blocks. */
21324 if (alg != libcall)
21325 return alg;
21326 break;
21327 }
21328 else if (ALG_USABLE_P (candidate))
21329 return candidate;
21330 }
21331 }
21332 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21333 }
21334 /* When asked to inline the call anyway, try to pick meaningful choice.
21335 We look for maximal size of block that is faster to copy by hand and
21336 take blocks of at most of that size guessing that average size will
21337 be roughly half of the block.
21338
21339 If this turns out to be bad, we might simply specify the preferred
21340 choice in ix86_costs. */
21341 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21342 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21343 {
21344 int max = -1;
21345 enum stringop_alg alg;
21346 int i;
21347 bool any_alg_usable_p = true;
21348
21349 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21350 {
21351 enum stringop_alg candidate = algs->size[i].alg;
21352 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21353
21354 if (candidate != libcall && candidate
21355 && ALG_USABLE_P (candidate))
21356 max = algs->size[i].max;
21357 }
21358 /* If there aren't any usable algorithms, then recursing on
21359 smaller sizes isn't going to find anything. Just return the
21360 simple byte-at-a-time copy loop. */
21361 if (!any_alg_usable_p)
21362 {
21363 /* Pick something reasonable. */
21364 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21365 *dynamic_check = 128;
21366 return loop_1_byte;
21367 }
21368 if (max == -1)
21369 max = 4096;
21370 alg = decide_alg (count, max / 2, memset, dynamic_check);
21371 gcc_assert (*dynamic_check == -1);
21372 gcc_assert (alg != libcall);
21373 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21374 *dynamic_check = max;
21375 return alg;
21376 }
21377 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
21378 #undef ALG_USABLE_P
21379 }
21380
21381 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21382 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21383 static int
21384 decide_alignment (int align,
21385 enum stringop_alg alg,
21386 int expected_size)
21387 {
21388 int desired_align = 0;
21389 switch (alg)
21390 {
21391 case no_stringop:
21392 gcc_unreachable ();
21393 case loop:
21394 case unrolled_loop:
21395 desired_align = GET_MODE_SIZE (Pmode);
21396 break;
21397 case rep_prefix_8_byte:
21398 desired_align = 8;
21399 break;
21400 case rep_prefix_4_byte:
21401 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21402 copying whole cacheline at once. */
21403 if (TARGET_PENTIUMPRO)
21404 desired_align = 8;
21405 else
21406 desired_align = 4;
21407 break;
21408 case rep_prefix_1_byte:
21409 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21410 copying whole cacheline at once. */
21411 if (TARGET_PENTIUMPRO)
21412 desired_align = 8;
21413 else
21414 desired_align = 1;
21415 break;
21416 case loop_1_byte:
21417 desired_align = 1;
21418 break;
21419 case libcall:
21420 return 0;
21421 }
21422
21423 if (optimize_size)
21424 desired_align = 1;
21425 if (desired_align < align)
21426 desired_align = align;
21427 if (expected_size != -1 && expected_size < 4)
21428 desired_align = align;
21429 return desired_align;
21430 }
21431
21432 /* Return the smallest power of 2 greater than VAL. */
21433 static int
21434 smallest_pow2_greater_than (int val)
21435 {
21436 int ret = 1;
21437 while (ret <= val)
21438 ret <<= 1;
21439 return ret;
21440 }
21441
21442 /* Expand string move (memcpy) operation. Use i386 string operations
21443 when profitable. expand_setmem contains similar code. The code
21444 depends upon architecture, block size and alignment, but always has
21445 the same overall structure:
21446
21447 1) Prologue guard: Conditional that jumps up to epilogues for small
21448 blocks that can be handled by epilogue alone. This is faster
21449 but also needed for correctness, since prologue assume the block
21450 is larger than the desired alignment.
21451
21452 Optional dynamic check for size and libcall for large
21453 blocks is emitted here too, with -minline-stringops-dynamically.
21454
21455 2) Prologue: copy first few bytes in order to get destination
21456 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21457 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21458 copied. We emit either a jump tree on power of two sized
21459 blocks, or a byte loop.
21460
21461 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21462 with specified algorithm.
21463
21464 4) Epilogue: code copying tail of the block that is too small to be
21465 handled by main body (or up to size guarded by prologue guard). */
21466
21467 bool
21468 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21469 rtx expected_align_exp, rtx expected_size_exp)
21470 {
21471 rtx destreg;
21472 rtx srcreg;
21473 rtx label = NULL;
21474 rtx tmp;
21475 rtx jump_around_label = NULL;
21476 HOST_WIDE_INT align = 1;
21477 unsigned HOST_WIDE_INT count = 0;
21478 HOST_WIDE_INT expected_size = -1;
21479 int size_needed = 0, epilogue_size_needed;
21480 int desired_align = 0, align_bytes = 0;
21481 enum stringop_alg alg;
21482 int dynamic_check;
21483 bool need_zero_guard = false;
21484
21485 if (CONST_INT_P (align_exp))
21486 align = INTVAL (align_exp);
21487 /* i386 can do misaligned access on reasonably increased cost. */
21488 if (CONST_INT_P (expected_align_exp)
21489 && INTVAL (expected_align_exp) > align)
21490 align = INTVAL (expected_align_exp);
21491 /* ALIGN is the minimum of destination and source alignment, but we care here
21492 just about destination alignment. */
21493 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21494 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21495
21496 if (CONST_INT_P (count_exp))
21497 count = expected_size = INTVAL (count_exp);
21498 if (CONST_INT_P (expected_size_exp) && count == 0)
21499 expected_size = INTVAL (expected_size_exp);
21500
21501 /* Make sure we don't need to care about overflow later on. */
21502 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21503 return false;
21504
21505 /* Step 0: Decide on preferred algorithm, desired alignment and
21506 size of chunks to be copied by main loop. */
21507
21508 alg = decide_alg (count, expected_size, false, &dynamic_check);
21509 desired_align = decide_alignment (align, alg, expected_size);
21510
21511 if (!TARGET_ALIGN_STRINGOPS)
21512 align = desired_align;
21513
21514 if (alg == libcall)
21515 return false;
21516 gcc_assert (alg != no_stringop);
21517 if (!count)
21518 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21519 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21520 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21521 switch (alg)
21522 {
21523 case libcall:
21524 case no_stringop:
21525 gcc_unreachable ();
21526 case loop:
21527 need_zero_guard = true;
21528 size_needed = GET_MODE_SIZE (Pmode);
21529 break;
21530 case unrolled_loop:
21531 need_zero_guard = true;
21532 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21533 break;
21534 case rep_prefix_8_byte:
21535 size_needed = 8;
21536 break;
21537 case rep_prefix_4_byte:
21538 size_needed = 4;
21539 break;
21540 case rep_prefix_1_byte:
21541 size_needed = 1;
21542 break;
21543 case loop_1_byte:
21544 need_zero_guard = true;
21545 size_needed = 1;
21546 break;
21547 }
21548
21549 epilogue_size_needed = size_needed;
21550
21551 /* Step 1: Prologue guard. */
21552
21553 /* Alignment code needs count to be in register. */
21554 if (CONST_INT_P (count_exp) && desired_align > align)
21555 {
21556 if (INTVAL (count_exp) > desired_align
21557 && INTVAL (count_exp) > size_needed)
21558 {
21559 align_bytes
21560 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21561 if (align_bytes <= 0)
21562 align_bytes = 0;
21563 else
21564 align_bytes = desired_align - align_bytes;
21565 }
21566 if (align_bytes == 0)
21567 count_exp = force_reg (counter_mode (count_exp), count_exp);
21568 }
21569 gcc_assert (desired_align >= 1 && align >= 1);
21570
21571 /* Ensure that alignment prologue won't copy past end of block. */
21572 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21573 {
21574 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21575 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21576 Make sure it is power of 2. */
21577 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21578
21579 if (count)
21580 {
21581 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21582 {
21583 /* If main algorithm works on QImode, no epilogue is needed.
21584 For small sizes just don't align anything. */
21585 if (size_needed == 1)
21586 desired_align = align;
21587 else
21588 goto epilogue;
21589 }
21590 }
21591 else
21592 {
21593 label = gen_label_rtx ();
21594 emit_cmp_and_jump_insns (count_exp,
21595 GEN_INT (epilogue_size_needed),
21596 LTU, 0, counter_mode (count_exp), 1, label);
21597 if (expected_size == -1 || expected_size < epilogue_size_needed)
21598 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21599 else
21600 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21601 }
21602 }
21603
21604 /* Emit code to decide on runtime whether library call or inline should be
21605 used. */
21606 if (dynamic_check != -1)
21607 {
21608 if (CONST_INT_P (count_exp))
21609 {
21610 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21611 {
21612 emit_block_move_via_libcall (dst, src, count_exp, false);
21613 count_exp = const0_rtx;
21614 goto epilogue;
21615 }
21616 }
21617 else
21618 {
21619 rtx hot_label = gen_label_rtx ();
21620 jump_around_label = gen_label_rtx ();
21621 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21622 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21623 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21624 emit_block_move_via_libcall (dst, src, count_exp, false);
21625 emit_jump (jump_around_label);
21626 emit_label (hot_label);
21627 }
21628 }
21629
21630 /* Step 2: Alignment prologue. */
21631
21632 if (desired_align > align)
21633 {
21634 if (align_bytes == 0)
21635 {
21636 /* Except for the first move in epilogue, we no longer know
21637 constant offset in aliasing info. It don't seems to worth
21638 the pain to maintain it for the first move, so throw away
21639 the info early. */
21640 src = change_address (src, BLKmode, srcreg);
21641 dst = change_address (dst, BLKmode, destreg);
21642 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21643 desired_align);
21644 }
21645 else
21646 {
21647 /* If we know how many bytes need to be stored before dst is
21648 sufficiently aligned, maintain aliasing info accurately. */
21649 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21650 desired_align, align_bytes);
21651 count_exp = plus_constant (count_exp, -align_bytes);
21652 count -= align_bytes;
21653 }
21654 if (need_zero_guard
21655 && (count < (unsigned HOST_WIDE_INT) size_needed
21656 || (align_bytes == 0
21657 && count < ((unsigned HOST_WIDE_INT) size_needed
21658 + desired_align - align))))
21659 {
21660 /* It is possible that we copied enough so the main loop will not
21661 execute. */
21662 gcc_assert (size_needed > 1);
21663 if (label == NULL_RTX)
21664 label = gen_label_rtx ();
21665 emit_cmp_and_jump_insns (count_exp,
21666 GEN_INT (size_needed),
21667 LTU, 0, counter_mode (count_exp), 1, label);
21668 if (expected_size == -1
21669 || expected_size < (desired_align - align) / 2 + size_needed)
21670 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21671 else
21672 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21673 }
21674 }
21675 if (label && size_needed == 1)
21676 {
21677 emit_label (label);
21678 LABEL_NUSES (label) = 1;
21679 label = NULL;
21680 epilogue_size_needed = 1;
21681 }
21682 else if (label == NULL_RTX)
21683 epilogue_size_needed = size_needed;
21684
21685 /* Step 3: Main loop. */
21686
21687 switch (alg)
21688 {
21689 case libcall:
21690 case no_stringop:
21691 gcc_unreachable ();
21692 case loop_1_byte:
21693 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21694 count_exp, QImode, 1, expected_size);
21695 break;
21696 case loop:
21697 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21698 count_exp, Pmode, 1, expected_size);
21699 break;
21700 case unrolled_loop:
21701 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21702 registers for 4 temporaries anyway. */
21703 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21704 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21705 expected_size);
21706 break;
21707 case rep_prefix_8_byte:
21708 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21709 DImode);
21710 break;
21711 case rep_prefix_4_byte:
21712 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21713 SImode);
21714 break;
21715 case rep_prefix_1_byte:
21716 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21717 QImode);
21718 break;
21719 }
21720 /* Adjust properly the offset of src and dest memory for aliasing. */
21721 if (CONST_INT_P (count_exp))
21722 {
21723 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21724 (count / size_needed) * size_needed);
21725 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21726 (count / size_needed) * size_needed);
21727 }
21728 else
21729 {
21730 src = change_address (src, BLKmode, srcreg);
21731 dst = change_address (dst, BLKmode, destreg);
21732 }
21733
21734 /* Step 4: Epilogue to copy the remaining bytes. */
21735 epilogue:
21736 if (label)
21737 {
21738 /* When the main loop is done, COUNT_EXP might hold original count,
21739 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21740 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21741 bytes. Compensate if needed. */
21742
21743 if (size_needed < epilogue_size_needed)
21744 {
21745 tmp =
21746 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21747 GEN_INT (size_needed - 1), count_exp, 1,
21748 OPTAB_DIRECT);
21749 if (tmp != count_exp)
21750 emit_move_insn (count_exp, tmp);
21751 }
21752 emit_label (label);
21753 LABEL_NUSES (label) = 1;
21754 }
21755
21756 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21757 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21758 epilogue_size_needed);
21759 if (jump_around_label)
21760 emit_label (jump_around_label);
21761 return true;
21762 }
21763
21764 /* Helper function for memcpy. For QImode value 0xXY produce
21765 0xXYXYXYXY of wide specified by MODE. This is essentially
21766 a * 0x10101010, but we can do slightly better than
21767 synth_mult by unwinding the sequence by hand on CPUs with
21768 slow multiply. */
21769 static rtx
21770 promote_duplicated_reg (enum machine_mode mode, rtx val)
21771 {
21772 enum machine_mode valmode = GET_MODE (val);
21773 rtx tmp;
21774 int nops = mode == DImode ? 3 : 2;
21775
21776 gcc_assert (mode == SImode || mode == DImode);
21777 if (val == const0_rtx)
21778 return copy_to_mode_reg (mode, const0_rtx);
21779 if (CONST_INT_P (val))
21780 {
21781 HOST_WIDE_INT v = INTVAL (val) & 255;
21782
21783 v |= v << 8;
21784 v |= v << 16;
21785 if (mode == DImode)
21786 v |= (v << 16) << 16;
21787 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21788 }
21789
21790 if (valmode == VOIDmode)
21791 valmode = QImode;
21792 if (valmode != QImode)
21793 val = gen_lowpart (QImode, val);
21794 if (mode == QImode)
21795 return val;
21796 if (!TARGET_PARTIAL_REG_STALL)
21797 nops--;
21798 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21799 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21800 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21801 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21802 {
21803 rtx reg = convert_modes (mode, QImode, val, true);
21804 tmp = promote_duplicated_reg (mode, const1_rtx);
21805 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21806 OPTAB_DIRECT);
21807 }
21808 else
21809 {
21810 rtx reg = convert_modes (mode, QImode, val, true);
21811
21812 if (!TARGET_PARTIAL_REG_STALL)
21813 if (mode == SImode)
21814 emit_insn (gen_movsi_insv_1 (reg, reg));
21815 else
21816 emit_insn (gen_movdi_insv_1 (reg, reg));
21817 else
21818 {
21819 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21820 NULL, 1, OPTAB_DIRECT);
21821 reg =
21822 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21823 }
21824 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21825 NULL, 1, OPTAB_DIRECT);
21826 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21827 if (mode == SImode)
21828 return reg;
21829 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21830 NULL, 1, OPTAB_DIRECT);
21831 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21832 return reg;
21833 }
21834 }
21835
21836 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21837 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21838 alignment from ALIGN to DESIRED_ALIGN. */
21839 static rtx
21840 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21841 {
21842 rtx promoted_val;
21843
21844 if (TARGET_64BIT
21845 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21846 promoted_val = promote_duplicated_reg (DImode, val);
21847 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21848 promoted_val = promote_duplicated_reg (SImode, val);
21849 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21850 promoted_val = promote_duplicated_reg (HImode, val);
21851 else
21852 promoted_val = val;
21853
21854 return promoted_val;
21855 }
21856
21857 /* Expand string clear operation (bzero). Use i386 string operations when
21858 profitable. See expand_movmem comment for explanation of individual
21859 steps performed. */
21860 bool
21861 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21862 rtx expected_align_exp, rtx expected_size_exp)
21863 {
21864 rtx destreg;
21865 rtx label = NULL;
21866 rtx tmp;
21867 rtx jump_around_label = NULL;
21868 HOST_WIDE_INT align = 1;
21869 unsigned HOST_WIDE_INT count = 0;
21870 HOST_WIDE_INT expected_size = -1;
21871 int size_needed = 0, epilogue_size_needed;
21872 int desired_align = 0, align_bytes = 0;
21873 enum stringop_alg alg;
21874 rtx promoted_val = NULL;
21875 bool force_loopy_epilogue = false;
21876 int dynamic_check;
21877 bool need_zero_guard = false;
21878
21879 if (CONST_INT_P (align_exp))
21880 align = INTVAL (align_exp);
21881 /* i386 can do misaligned access on reasonably increased cost. */
21882 if (CONST_INT_P (expected_align_exp)
21883 && INTVAL (expected_align_exp) > align)
21884 align = INTVAL (expected_align_exp);
21885 if (CONST_INT_P (count_exp))
21886 count = expected_size = INTVAL (count_exp);
21887 if (CONST_INT_P (expected_size_exp) && count == 0)
21888 expected_size = INTVAL (expected_size_exp);
21889
21890 /* Make sure we don't need to care about overflow later on. */
21891 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21892 return false;
21893
21894 /* Step 0: Decide on preferred algorithm, desired alignment and
21895 size of chunks to be copied by main loop. */
21896
21897 alg = decide_alg (count, expected_size, true, &dynamic_check);
21898 desired_align = decide_alignment (align, alg, expected_size);
21899
21900 if (!TARGET_ALIGN_STRINGOPS)
21901 align = desired_align;
21902
21903 if (alg == libcall)
21904 return false;
21905 gcc_assert (alg != no_stringop);
21906 if (!count)
21907 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21908 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21909 switch (alg)
21910 {
21911 case libcall:
21912 case no_stringop:
21913 gcc_unreachable ();
21914 case loop:
21915 need_zero_guard = true;
21916 size_needed = GET_MODE_SIZE (Pmode);
21917 break;
21918 case unrolled_loop:
21919 need_zero_guard = true;
21920 size_needed = GET_MODE_SIZE (Pmode) * 4;
21921 break;
21922 case rep_prefix_8_byte:
21923 size_needed = 8;
21924 break;
21925 case rep_prefix_4_byte:
21926 size_needed = 4;
21927 break;
21928 case rep_prefix_1_byte:
21929 size_needed = 1;
21930 break;
21931 case loop_1_byte:
21932 need_zero_guard = true;
21933 size_needed = 1;
21934 break;
21935 }
21936 epilogue_size_needed = size_needed;
21937
21938 /* Step 1: Prologue guard. */
21939
21940 /* Alignment code needs count to be in register. */
21941 if (CONST_INT_P (count_exp) && desired_align > align)
21942 {
21943 if (INTVAL (count_exp) > desired_align
21944 && INTVAL (count_exp) > size_needed)
21945 {
21946 align_bytes
21947 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21948 if (align_bytes <= 0)
21949 align_bytes = 0;
21950 else
21951 align_bytes = desired_align - align_bytes;
21952 }
21953 if (align_bytes == 0)
21954 {
21955 enum machine_mode mode = SImode;
21956 if (TARGET_64BIT && (count & ~0xffffffff))
21957 mode = DImode;
21958 count_exp = force_reg (mode, count_exp);
21959 }
21960 }
21961 /* Do the cheap promotion to allow better CSE across the
21962 main loop and epilogue (ie one load of the big constant in the
21963 front of all code. */
21964 if (CONST_INT_P (val_exp))
21965 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21966 desired_align, align);
21967 /* Ensure that alignment prologue won't copy past end of block. */
21968 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21969 {
21970 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21971 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21972 Make sure it is power of 2. */
21973 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21974
21975 /* To improve performance of small blocks, we jump around the VAL
21976 promoting mode. This mean that if the promoted VAL is not constant,
21977 we might not use it in the epilogue and have to use byte
21978 loop variant. */
21979 if (epilogue_size_needed > 2 && !promoted_val)
21980 force_loopy_epilogue = true;
21981 if (count)
21982 {
21983 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21984 {
21985 /* If main algorithm works on QImode, no epilogue is needed.
21986 For small sizes just don't align anything. */
21987 if (size_needed == 1)
21988 desired_align = align;
21989 else
21990 goto epilogue;
21991 }
21992 }
21993 else
21994 {
21995 label = gen_label_rtx ();
21996 emit_cmp_and_jump_insns (count_exp,
21997 GEN_INT (epilogue_size_needed),
21998 LTU, 0, counter_mode (count_exp), 1, label);
21999 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22000 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22001 else
22002 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22003 }
22004 }
22005 if (dynamic_check != -1)
22006 {
22007 rtx hot_label = gen_label_rtx ();
22008 jump_around_label = gen_label_rtx ();
22009 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22010 LEU, 0, counter_mode (count_exp), 1, hot_label);
22011 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22012 set_storage_via_libcall (dst, count_exp, val_exp, false);
22013 emit_jump (jump_around_label);
22014 emit_label (hot_label);
22015 }
22016
22017 /* Step 2: Alignment prologue. */
22018
22019 /* Do the expensive promotion once we branched off the small blocks. */
22020 if (!promoted_val)
22021 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22022 desired_align, align);
22023 gcc_assert (desired_align >= 1 && align >= 1);
22024
22025 if (desired_align > align)
22026 {
22027 if (align_bytes == 0)
22028 {
22029 /* Except for the first move in epilogue, we no longer know
22030 constant offset in aliasing info. It don't seems to worth
22031 the pain to maintain it for the first move, so throw away
22032 the info early. */
22033 dst = change_address (dst, BLKmode, destreg);
22034 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22035 desired_align);
22036 }
22037 else
22038 {
22039 /* If we know how many bytes need to be stored before dst is
22040 sufficiently aligned, maintain aliasing info accurately. */
22041 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22042 desired_align, align_bytes);
22043 count_exp = plus_constant (count_exp, -align_bytes);
22044 count -= align_bytes;
22045 }
22046 if (need_zero_guard
22047 && (count < (unsigned HOST_WIDE_INT) size_needed
22048 || (align_bytes == 0
22049 && count < ((unsigned HOST_WIDE_INT) size_needed
22050 + desired_align - align))))
22051 {
22052 /* It is possible that we copied enough so the main loop will not
22053 execute. */
22054 gcc_assert (size_needed > 1);
22055 if (label == NULL_RTX)
22056 label = gen_label_rtx ();
22057 emit_cmp_and_jump_insns (count_exp,
22058 GEN_INT (size_needed),
22059 LTU, 0, counter_mode (count_exp), 1, label);
22060 if (expected_size == -1
22061 || expected_size < (desired_align - align) / 2 + size_needed)
22062 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22063 else
22064 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22065 }
22066 }
22067 if (label && size_needed == 1)
22068 {
22069 emit_label (label);
22070 LABEL_NUSES (label) = 1;
22071 label = NULL;
22072 promoted_val = val_exp;
22073 epilogue_size_needed = 1;
22074 }
22075 else if (label == NULL_RTX)
22076 epilogue_size_needed = size_needed;
22077
22078 /* Step 3: Main loop. */
22079
22080 switch (alg)
22081 {
22082 case libcall:
22083 case no_stringop:
22084 gcc_unreachable ();
22085 case loop_1_byte:
22086 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22087 count_exp, QImode, 1, expected_size);
22088 break;
22089 case loop:
22090 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22091 count_exp, Pmode, 1, expected_size);
22092 break;
22093 case unrolled_loop:
22094 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22095 count_exp, Pmode, 4, expected_size);
22096 break;
22097 case rep_prefix_8_byte:
22098 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22099 DImode, val_exp);
22100 break;
22101 case rep_prefix_4_byte:
22102 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22103 SImode, val_exp);
22104 break;
22105 case rep_prefix_1_byte:
22106 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22107 QImode, val_exp);
22108 break;
22109 }
22110 /* Adjust properly the offset of src and dest memory for aliasing. */
22111 if (CONST_INT_P (count_exp))
22112 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22113 (count / size_needed) * size_needed);
22114 else
22115 dst = change_address (dst, BLKmode, destreg);
22116
22117 /* Step 4: Epilogue to copy the remaining bytes. */
22118
22119 if (label)
22120 {
22121 /* When the main loop is done, COUNT_EXP might hold original count,
22122 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22123 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22124 bytes. Compensate if needed. */
22125
22126 if (size_needed < epilogue_size_needed)
22127 {
22128 tmp =
22129 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22130 GEN_INT (size_needed - 1), count_exp, 1,
22131 OPTAB_DIRECT);
22132 if (tmp != count_exp)
22133 emit_move_insn (count_exp, tmp);
22134 }
22135 emit_label (label);
22136 LABEL_NUSES (label) = 1;
22137 }
22138 epilogue:
22139 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22140 {
22141 if (force_loopy_epilogue)
22142 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22143 epilogue_size_needed);
22144 else
22145 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22146 epilogue_size_needed);
22147 }
22148 if (jump_around_label)
22149 emit_label (jump_around_label);
22150 return true;
22151 }
22152
22153 /* Expand the appropriate insns for doing strlen if not just doing
22154 repnz; scasb
22155
22156 out = result, initialized with the start address
22157 align_rtx = alignment of the address.
22158 scratch = scratch register, initialized with the startaddress when
22159 not aligned, otherwise undefined
22160
22161 This is just the body. It needs the initializations mentioned above and
22162 some address computing at the end. These things are done in i386.md. */
22163
22164 static void
22165 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22166 {
22167 int align;
22168 rtx tmp;
22169 rtx align_2_label = NULL_RTX;
22170 rtx align_3_label = NULL_RTX;
22171 rtx align_4_label = gen_label_rtx ();
22172 rtx end_0_label = gen_label_rtx ();
22173 rtx mem;
22174 rtx tmpreg = gen_reg_rtx (SImode);
22175 rtx scratch = gen_reg_rtx (SImode);
22176 rtx cmp;
22177
22178 align = 0;
22179 if (CONST_INT_P (align_rtx))
22180 align = INTVAL (align_rtx);
22181
22182 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22183
22184 /* Is there a known alignment and is it less than 4? */
22185 if (align < 4)
22186 {
22187 rtx scratch1 = gen_reg_rtx (Pmode);
22188 emit_move_insn (scratch1, out);
22189 /* Is there a known alignment and is it not 2? */
22190 if (align != 2)
22191 {
22192 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22193 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22194
22195 /* Leave just the 3 lower bits. */
22196 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22197 NULL_RTX, 0, OPTAB_WIDEN);
22198
22199 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22200 Pmode, 1, align_4_label);
22201 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22202 Pmode, 1, align_2_label);
22203 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22204 Pmode, 1, align_3_label);
22205 }
22206 else
22207 {
22208 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22209 check if is aligned to 4 - byte. */
22210
22211 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22212 NULL_RTX, 0, OPTAB_WIDEN);
22213
22214 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22215 Pmode, 1, align_4_label);
22216 }
22217
22218 mem = change_address (src, QImode, out);
22219
22220 /* Now compare the bytes. */
22221
22222 /* Compare the first n unaligned byte on a byte per byte basis. */
22223 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22224 QImode, 1, end_0_label);
22225
22226 /* Increment the address. */
22227 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22228
22229 /* Not needed with an alignment of 2 */
22230 if (align != 2)
22231 {
22232 emit_label (align_2_label);
22233
22234 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22235 end_0_label);
22236
22237 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22238
22239 emit_label (align_3_label);
22240 }
22241
22242 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22243 end_0_label);
22244
22245 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22246 }
22247
22248 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22249 align this loop. It gives only huge programs, but does not help to
22250 speed up. */
22251 emit_label (align_4_label);
22252
22253 mem = change_address (src, SImode, out);
22254 emit_move_insn (scratch, mem);
22255 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22256
22257 /* This formula yields a nonzero result iff one of the bytes is zero.
22258 This saves three branches inside loop and many cycles. */
22259
22260 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22261 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22262 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22263 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22264 gen_int_mode (0x80808080, SImode)));
22265 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22266 align_4_label);
22267
22268 if (TARGET_CMOVE)
22269 {
22270 rtx reg = gen_reg_rtx (SImode);
22271 rtx reg2 = gen_reg_rtx (Pmode);
22272 emit_move_insn (reg, tmpreg);
22273 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22274
22275 /* If zero is not in the first two bytes, move two bytes forward. */
22276 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22277 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22278 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22279 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22280 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22281 reg,
22282 tmpreg)));
22283 /* Emit lea manually to avoid clobbering of flags. */
22284 emit_insn (gen_rtx_SET (SImode, reg2,
22285 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22286
22287 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22288 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22289 emit_insn (gen_rtx_SET (VOIDmode, out,
22290 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22291 reg2,
22292 out)));
22293 }
22294 else
22295 {
22296 rtx end_2_label = gen_label_rtx ();
22297 /* Is zero in the first two bytes? */
22298
22299 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22300 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22301 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22302 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22303 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22304 pc_rtx);
22305 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22306 JUMP_LABEL (tmp) = end_2_label;
22307
22308 /* Not in the first two. Move two bytes forward. */
22309 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22310 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22311
22312 emit_label (end_2_label);
22313
22314 }
22315
22316 /* Avoid branch in fixing the byte. */
22317 tmpreg = gen_lowpart (QImode, tmpreg);
22318 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22319 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22320 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22321 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22322
22323 emit_label (end_0_label);
22324 }
22325
22326 /* Expand strlen. */
22327
22328 bool
22329 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22330 {
22331 rtx addr, scratch1, scratch2, scratch3, scratch4;
22332
22333 /* The generic case of strlen expander is long. Avoid it's
22334 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22335
22336 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22337 && !TARGET_INLINE_ALL_STRINGOPS
22338 && !optimize_insn_for_size_p ()
22339 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22340 return false;
22341
22342 addr = force_reg (Pmode, XEXP (src, 0));
22343 scratch1 = gen_reg_rtx (Pmode);
22344
22345 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22346 && !optimize_insn_for_size_p ())
22347 {
22348 /* Well it seems that some optimizer does not combine a call like
22349 foo(strlen(bar), strlen(bar));
22350 when the move and the subtraction is done here. It does calculate
22351 the length just once when these instructions are done inside of
22352 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22353 often used and I use one fewer register for the lifetime of
22354 output_strlen_unroll() this is better. */
22355
22356 emit_move_insn (out, addr);
22357
22358 ix86_expand_strlensi_unroll_1 (out, src, align);
22359
22360 /* strlensi_unroll_1 returns the address of the zero at the end of
22361 the string, like memchr(), so compute the length by subtracting
22362 the start address. */
22363 emit_insn (ix86_gen_sub3 (out, out, addr));
22364 }
22365 else
22366 {
22367 rtx unspec;
22368
22369 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22370 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
22371 return false;
22372
22373 scratch2 = gen_reg_rtx (Pmode);
22374 scratch3 = gen_reg_rtx (Pmode);
22375 scratch4 = force_reg (Pmode, constm1_rtx);
22376
22377 emit_move_insn (scratch3, addr);
22378 eoschar = force_reg (QImode, eoschar);
22379
22380 src = replace_equiv_address_nv (src, scratch3);
22381
22382 /* If .md starts supporting :P, this can be done in .md. */
22383 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
22384 scratch4), UNSPEC_SCAS);
22385 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
22386 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
22387 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22388 }
22389 return true;
22390 }
22391
22392 /* For given symbol (function) construct code to compute address of it's PLT
22393 entry in large x86-64 PIC model. */
22394 rtx
22395 construct_plt_address (rtx symbol)
22396 {
22397 rtx tmp = gen_reg_rtx (Pmode);
22398 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22399
22400 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22401 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22402
22403 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22404 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22405 return tmp;
22406 }
22407
22408 rtx
22409 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22410 rtx callarg2,
22411 rtx pop, bool sibcall)
22412 {
22413 /* We need to represent that SI and DI registers are clobbered
22414 by SYSV calls. */
22415 static int clobbered_registers[] = {
22416 XMM6_REG, XMM7_REG, XMM8_REG,
22417 XMM9_REG, XMM10_REG, XMM11_REG,
22418 XMM12_REG, XMM13_REG, XMM14_REG,
22419 XMM15_REG, SI_REG, DI_REG
22420 };
22421 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22422 rtx use = NULL, call;
22423 unsigned int vec_len;
22424
22425 if (pop == const0_rtx)
22426 pop = NULL;
22427 gcc_assert (!TARGET_64BIT || !pop);
22428
22429 if (TARGET_MACHO && !TARGET_64BIT)
22430 {
22431 #if TARGET_MACHO
22432 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22433 fnaddr = machopic_indirect_call_target (fnaddr);
22434 #endif
22435 }
22436 else
22437 {
22438 /* Static functions and indirect calls don't need the pic register. */
22439 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22440 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22441 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22442 use_reg (&use, pic_offset_table_rtx);
22443 }
22444
22445 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22446 {
22447 rtx al = gen_rtx_REG (QImode, AX_REG);
22448 emit_move_insn (al, callarg2);
22449 use_reg (&use, al);
22450 }
22451
22452 if (ix86_cmodel == CM_LARGE_PIC
22453 && MEM_P (fnaddr)
22454 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22455 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22456 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22457 else if (sibcall
22458 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22459 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22460 {
22461 fnaddr = XEXP (fnaddr, 0);
22462 if (GET_MODE (fnaddr) != Pmode)
22463 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22464 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22465 }
22466
22467 vec_len = 0;
22468 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22469 if (retval)
22470 call = gen_rtx_SET (VOIDmode, retval, call);
22471 vec[vec_len++] = call;
22472
22473 if (pop)
22474 {
22475 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22476 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22477 vec[vec_len++] = pop;
22478 }
22479
22480 if (TARGET_64BIT_MS_ABI
22481 && (!callarg2 || INTVAL (callarg2) != -2))
22482 {
22483 unsigned i;
22484
22485 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22486 UNSPEC_MS_TO_SYSV_CALL);
22487
22488 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22489 vec[vec_len++]
22490 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22491 ? TImode : DImode,
22492 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22493 ? TImode : DImode,
22494 clobbered_registers[i]));
22495 }
22496
22497 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22498 if (TARGET_VZEROUPPER)
22499 {
22500 int avx256;
22501 if (cfun->machine->callee_pass_avx256_p)
22502 {
22503 if (cfun->machine->callee_return_avx256_p)
22504 avx256 = callee_return_pass_avx256;
22505 else
22506 avx256 = callee_pass_avx256;
22507 }
22508 else if (cfun->machine->callee_return_avx256_p)
22509 avx256 = callee_return_avx256;
22510 else
22511 avx256 = call_no_avx256;
22512
22513 if (reload_completed)
22514 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22515 else
22516 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22517 gen_rtvec (1, GEN_INT (avx256)),
22518 UNSPEC_CALL_NEEDS_VZEROUPPER);
22519 }
22520
22521 if (vec_len > 1)
22522 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22523 call = emit_call_insn (call);
22524 if (use)
22525 CALL_INSN_FUNCTION_USAGE (call) = use;
22526
22527 return call;
22528 }
22529
22530 void
22531 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22532 {
22533 rtx pat = PATTERN (insn);
22534 rtvec vec = XVEC (pat, 0);
22535 int len = GET_NUM_ELEM (vec) - 1;
22536
22537 /* Strip off the last entry of the parallel. */
22538 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22539 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22540 if (len == 1)
22541 pat = RTVEC_ELT (vec, 0);
22542 else
22543 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22544
22545 emit_insn (gen_avx_vzeroupper (vzeroupper));
22546 emit_call_insn (pat);
22547 }
22548
22549 /* Output the assembly for a call instruction. */
22550
22551 const char *
22552 ix86_output_call_insn (rtx insn, rtx call_op)
22553 {
22554 bool direct_p = constant_call_address_operand (call_op, Pmode);
22555 bool seh_nop_p = false;
22556 const char *xasm;
22557
22558 if (SIBLING_CALL_P (insn))
22559 {
22560 if (direct_p)
22561 xasm = "jmp\t%P0";
22562 /* SEH epilogue detection requires the indirect branch case
22563 to include REX.W. */
22564 else if (TARGET_SEH)
22565 xasm = "rex.W jmp %A0";
22566 else
22567 xasm = "jmp\t%A0";
22568
22569 output_asm_insn (xasm, &call_op);
22570 return "";
22571 }
22572
22573 /* SEH unwinding can require an extra nop to be emitted in several
22574 circumstances. Determine if we have one of those. */
22575 if (TARGET_SEH)
22576 {
22577 rtx i;
22578
22579 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22580 {
22581 /* If we get to another real insn, we don't need the nop. */
22582 if (INSN_P (i))
22583 break;
22584
22585 /* If we get to the epilogue note, prevent a catch region from
22586 being adjacent to the standard epilogue sequence. If non-
22587 call-exceptions, we'll have done this during epilogue emission. */
22588 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22589 && !flag_non_call_exceptions
22590 && !can_throw_internal (insn))
22591 {
22592 seh_nop_p = true;
22593 break;
22594 }
22595 }
22596
22597 /* If we didn't find a real insn following the call, prevent the
22598 unwinder from looking into the next function. */
22599 if (i == NULL)
22600 seh_nop_p = true;
22601 }
22602
22603 if (direct_p)
22604 xasm = "call\t%P0";
22605 else
22606 xasm = "call\t%A0";
22607
22608 output_asm_insn (xasm, &call_op);
22609
22610 if (seh_nop_p)
22611 return "nop";
22612
22613 return "";
22614 }
22615 \f
22616 /* Clear stack slot assignments remembered from previous functions.
22617 This is called from INIT_EXPANDERS once before RTL is emitted for each
22618 function. */
22619
22620 static struct machine_function *
22621 ix86_init_machine_status (void)
22622 {
22623 struct machine_function *f;
22624
22625 f = ggc_alloc_cleared_machine_function ();
22626 f->use_fast_prologue_epilogue_nregs = -1;
22627 f->tls_descriptor_call_expanded_p = 0;
22628 f->call_abi = ix86_abi;
22629
22630 return f;
22631 }
22632
22633 /* Return a MEM corresponding to a stack slot with mode MODE.
22634 Allocate a new slot if necessary.
22635
22636 The RTL for a function can have several slots available: N is
22637 which slot to use. */
22638
22639 rtx
22640 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22641 {
22642 struct stack_local_entry *s;
22643
22644 gcc_assert (n < MAX_386_STACK_LOCALS);
22645
22646 /* Virtual slot is valid only before vregs are instantiated. */
22647 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22648
22649 for (s = ix86_stack_locals; s; s = s->next)
22650 if (s->mode == mode && s->n == n)
22651 return validize_mem (copy_rtx (s->rtl));
22652
22653 s = ggc_alloc_stack_local_entry ();
22654 s->n = n;
22655 s->mode = mode;
22656 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22657
22658 s->next = ix86_stack_locals;
22659 ix86_stack_locals = s;
22660 return validize_mem (s->rtl);
22661 }
22662 \f
22663 /* Calculate the length of the memory address in the instruction encoding.
22664 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22665 or other prefixes. */
22666
22667 int
22668 memory_address_length (rtx addr)
22669 {
22670 struct ix86_address parts;
22671 rtx base, index, disp;
22672 int len;
22673 int ok;
22674
22675 if (GET_CODE (addr) == PRE_DEC
22676 || GET_CODE (addr) == POST_INC
22677 || GET_CODE (addr) == PRE_MODIFY
22678 || GET_CODE (addr) == POST_MODIFY)
22679 return 0;
22680
22681 ok = ix86_decompose_address (addr, &parts);
22682 gcc_assert (ok);
22683
22684 if (parts.base && GET_CODE (parts.base) == SUBREG)
22685 parts.base = SUBREG_REG (parts.base);
22686 if (parts.index && GET_CODE (parts.index) == SUBREG)
22687 parts.index = SUBREG_REG (parts.index);
22688
22689 base = parts.base;
22690 index = parts.index;
22691 disp = parts.disp;
22692
22693 /* Add length of addr32 prefix. */
22694 len = (GET_CODE (addr) == ZERO_EXTEND
22695 || GET_CODE (addr) == AND);
22696
22697 /* Rule of thumb:
22698 - esp as the base always wants an index,
22699 - ebp as the base always wants a displacement,
22700 - r12 as the base always wants an index,
22701 - r13 as the base always wants a displacement. */
22702
22703 /* Register Indirect. */
22704 if (base && !index && !disp)
22705 {
22706 /* esp (for its index) and ebp (for its displacement) need
22707 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22708 code. */
22709 if (REG_P (addr)
22710 && (addr == arg_pointer_rtx
22711 || addr == frame_pointer_rtx
22712 || REGNO (addr) == SP_REG
22713 || REGNO (addr) == BP_REG
22714 || REGNO (addr) == R12_REG
22715 || REGNO (addr) == R13_REG))
22716 len = 1;
22717 }
22718
22719 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22720 is not disp32, but disp32(%rip), so for disp32
22721 SIB byte is needed, unless print_operand_address
22722 optimizes it into disp32(%rip) or (%rip) is implied
22723 by UNSPEC. */
22724 else if (disp && !base && !index)
22725 {
22726 len = 4;
22727 if (TARGET_64BIT)
22728 {
22729 rtx symbol = disp;
22730
22731 if (GET_CODE (disp) == CONST)
22732 symbol = XEXP (disp, 0);
22733 if (GET_CODE (symbol) == PLUS
22734 && CONST_INT_P (XEXP (symbol, 1)))
22735 symbol = XEXP (symbol, 0);
22736
22737 if (GET_CODE (symbol) != LABEL_REF
22738 && (GET_CODE (symbol) != SYMBOL_REF
22739 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22740 && (GET_CODE (symbol) != UNSPEC
22741 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22742 && XINT (symbol, 1) != UNSPEC_PCREL
22743 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22744 len += 1;
22745 }
22746 }
22747
22748 else
22749 {
22750 /* Find the length of the displacement constant. */
22751 if (disp)
22752 {
22753 if (base && satisfies_constraint_K (disp))
22754 len = 1;
22755 else
22756 len = 4;
22757 }
22758 /* ebp always wants a displacement. Similarly r13. */
22759 else if (base && REG_P (base)
22760 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22761 len = 1;
22762
22763 /* An index requires the two-byte modrm form.... */
22764 if (index
22765 /* ...like esp (or r12), which always wants an index. */
22766 || base == arg_pointer_rtx
22767 || base == frame_pointer_rtx
22768 || (base && REG_P (base)
22769 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22770 len += 1;
22771 }
22772
22773 switch (parts.seg)
22774 {
22775 case SEG_FS:
22776 case SEG_GS:
22777 len += 1;
22778 break;
22779 default:
22780 break;
22781 }
22782
22783 return len;
22784 }
22785
22786 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22787 is set, expect that insn have 8bit immediate alternative. */
22788 int
22789 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22790 {
22791 int len = 0;
22792 int i;
22793 extract_insn_cached (insn);
22794 for (i = recog_data.n_operands - 1; i >= 0; --i)
22795 if (CONSTANT_P (recog_data.operand[i]))
22796 {
22797 enum attr_mode mode = get_attr_mode (insn);
22798
22799 gcc_assert (!len);
22800 if (shortform && CONST_INT_P (recog_data.operand[i]))
22801 {
22802 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22803 switch (mode)
22804 {
22805 case MODE_QI:
22806 len = 1;
22807 continue;
22808 case MODE_HI:
22809 ival = trunc_int_for_mode (ival, HImode);
22810 break;
22811 case MODE_SI:
22812 ival = trunc_int_for_mode (ival, SImode);
22813 break;
22814 default:
22815 break;
22816 }
22817 if (IN_RANGE (ival, -128, 127))
22818 {
22819 len = 1;
22820 continue;
22821 }
22822 }
22823 switch (mode)
22824 {
22825 case MODE_QI:
22826 len = 1;
22827 break;
22828 case MODE_HI:
22829 len = 2;
22830 break;
22831 case MODE_SI:
22832 len = 4;
22833 break;
22834 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22835 case MODE_DI:
22836 len = 4;
22837 break;
22838 default:
22839 fatal_insn ("unknown insn mode", insn);
22840 }
22841 }
22842 return len;
22843 }
22844 /* Compute default value for "length_address" attribute. */
22845 int
22846 ix86_attr_length_address_default (rtx insn)
22847 {
22848 int i;
22849
22850 if (get_attr_type (insn) == TYPE_LEA)
22851 {
22852 rtx set = PATTERN (insn), addr;
22853
22854 if (GET_CODE (set) == PARALLEL)
22855 set = XVECEXP (set, 0, 0);
22856
22857 gcc_assert (GET_CODE (set) == SET);
22858
22859 addr = SET_SRC (set);
22860 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22861 {
22862 if (GET_CODE (addr) == ZERO_EXTEND)
22863 addr = XEXP (addr, 0);
22864 if (GET_CODE (addr) == SUBREG)
22865 addr = SUBREG_REG (addr);
22866 }
22867
22868 return memory_address_length (addr);
22869 }
22870
22871 extract_insn_cached (insn);
22872 for (i = recog_data.n_operands - 1; i >= 0; --i)
22873 if (MEM_P (recog_data.operand[i]))
22874 {
22875 constrain_operands_cached (reload_completed);
22876 if (which_alternative != -1)
22877 {
22878 const char *constraints = recog_data.constraints[i];
22879 int alt = which_alternative;
22880
22881 while (*constraints == '=' || *constraints == '+')
22882 constraints++;
22883 while (alt-- > 0)
22884 while (*constraints++ != ',')
22885 ;
22886 /* Skip ignored operands. */
22887 if (*constraints == 'X')
22888 continue;
22889 }
22890 return memory_address_length (XEXP (recog_data.operand[i], 0));
22891 }
22892 return 0;
22893 }
22894
22895 /* Compute default value for "length_vex" attribute. It includes
22896 2 or 3 byte VEX prefix and 1 opcode byte. */
22897
22898 int
22899 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22900 {
22901 int i;
22902
22903 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22904 byte VEX prefix. */
22905 if (!has_0f_opcode || has_vex_w)
22906 return 3 + 1;
22907
22908 /* We can always use 2 byte VEX prefix in 32bit. */
22909 if (!TARGET_64BIT)
22910 return 2 + 1;
22911
22912 extract_insn_cached (insn);
22913
22914 for (i = recog_data.n_operands - 1; i >= 0; --i)
22915 if (REG_P (recog_data.operand[i]))
22916 {
22917 /* REX.W bit uses 3 byte VEX prefix. */
22918 if (GET_MODE (recog_data.operand[i]) == DImode
22919 && GENERAL_REG_P (recog_data.operand[i]))
22920 return 3 + 1;
22921 }
22922 else
22923 {
22924 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22925 if (MEM_P (recog_data.operand[i])
22926 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22927 return 3 + 1;
22928 }
22929
22930 return 2 + 1;
22931 }
22932 \f
22933 /* Return the maximum number of instructions a cpu can issue. */
22934
22935 static int
22936 ix86_issue_rate (void)
22937 {
22938 switch (ix86_tune)
22939 {
22940 case PROCESSOR_PENTIUM:
22941 case PROCESSOR_ATOM:
22942 case PROCESSOR_K6:
22943 return 2;
22944
22945 case PROCESSOR_PENTIUMPRO:
22946 case PROCESSOR_PENTIUM4:
22947 case PROCESSOR_CORE2_32:
22948 case PROCESSOR_CORE2_64:
22949 case PROCESSOR_COREI7_32:
22950 case PROCESSOR_COREI7_64:
22951 case PROCESSOR_ATHLON:
22952 case PROCESSOR_K8:
22953 case PROCESSOR_AMDFAM10:
22954 case PROCESSOR_NOCONA:
22955 case PROCESSOR_GENERIC32:
22956 case PROCESSOR_GENERIC64:
22957 case PROCESSOR_BDVER1:
22958 case PROCESSOR_BDVER2:
22959 case PROCESSOR_BTVER1:
22960 return 3;
22961
22962 default:
22963 return 1;
22964 }
22965 }
22966
22967 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22968 by DEP_INSN and nothing set by DEP_INSN. */
22969
22970 static bool
22971 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22972 {
22973 rtx set, set2;
22974
22975 /* Simplify the test for uninteresting insns. */
22976 if (insn_type != TYPE_SETCC
22977 && insn_type != TYPE_ICMOV
22978 && insn_type != TYPE_FCMOV
22979 && insn_type != TYPE_IBR)
22980 return false;
22981
22982 if ((set = single_set (dep_insn)) != 0)
22983 {
22984 set = SET_DEST (set);
22985 set2 = NULL_RTX;
22986 }
22987 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22988 && XVECLEN (PATTERN (dep_insn), 0) == 2
22989 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22990 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22991 {
22992 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22993 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22994 }
22995 else
22996 return false;
22997
22998 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22999 return false;
23000
23001 /* This test is true if the dependent insn reads the flags but
23002 not any other potentially set register. */
23003 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23004 return false;
23005
23006 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23007 return false;
23008
23009 return true;
23010 }
23011
23012 /* Return true iff USE_INSN has a memory address with operands set by
23013 SET_INSN. */
23014
23015 bool
23016 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23017 {
23018 int i;
23019 extract_insn_cached (use_insn);
23020 for (i = recog_data.n_operands - 1; i >= 0; --i)
23021 if (MEM_P (recog_data.operand[i]))
23022 {
23023 rtx addr = XEXP (recog_data.operand[i], 0);
23024 return modified_in_p (addr, set_insn) != 0;
23025 }
23026 return false;
23027 }
23028
23029 static int
23030 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23031 {
23032 enum attr_type insn_type, dep_insn_type;
23033 enum attr_memory memory;
23034 rtx set, set2;
23035 int dep_insn_code_number;
23036
23037 /* Anti and output dependencies have zero cost on all CPUs. */
23038 if (REG_NOTE_KIND (link) != 0)
23039 return 0;
23040
23041 dep_insn_code_number = recog_memoized (dep_insn);
23042
23043 /* If we can't recognize the insns, we can't really do anything. */
23044 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23045 return cost;
23046
23047 insn_type = get_attr_type (insn);
23048 dep_insn_type = get_attr_type (dep_insn);
23049
23050 switch (ix86_tune)
23051 {
23052 case PROCESSOR_PENTIUM:
23053 /* Address Generation Interlock adds a cycle of latency. */
23054 if (insn_type == TYPE_LEA)
23055 {
23056 rtx addr = PATTERN (insn);
23057
23058 if (GET_CODE (addr) == PARALLEL)
23059 addr = XVECEXP (addr, 0, 0);
23060
23061 gcc_assert (GET_CODE (addr) == SET);
23062
23063 addr = SET_SRC (addr);
23064 if (modified_in_p (addr, dep_insn))
23065 cost += 1;
23066 }
23067 else if (ix86_agi_dependent (dep_insn, insn))
23068 cost += 1;
23069
23070 /* ??? Compares pair with jump/setcc. */
23071 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23072 cost = 0;
23073
23074 /* Floating point stores require value to be ready one cycle earlier. */
23075 if (insn_type == TYPE_FMOV
23076 && get_attr_memory (insn) == MEMORY_STORE
23077 && !ix86_agi_dependent (dep_insn, insn))
23078 cost += 1;
23079 break;
23080
23081 case PROCESSOR_PENTIUMPRO:
23082 memory = get_attr_memory (insn);
23083
23084 /* INT->FP conversion is expensive. */
23085 if (get_attr_fp_int_src (dep_insn))
23086 cost += 5;
23087
23088 /* There is one cycle extra latency between an FP op and a store. */
23089 if (insn_type == TYPE_FMOV
23090 && (set = single_set (dep_insn)) != NULL_RTX
23091 && (set2 = single_set (insn)) != NULL_RTX
23092 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23093 && MEM_P (SET_DEST (set2)))
23094 cost += 1;
23095
23096 /* Show ability of reorder buffer to hide latency of load by executing
23097 in parallel with previous instruction in case
23098 previous instruction is not needed to compute the address. */
23099 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23100 && !ix86_agi_dependent (dep_insn, insn))
23101 {
23102 /* Claim moves to take one cycle, as core can issue one load
23103 at time and the next load can start cycle later. */
23104 if (dep_insn_type == TYPE_IMOV
23105 || dep_insn_type == TYPE_FMOV)
23106 cost = 1;
23107 else if (cost > 1)
23108 cost--;
23109 }
23110 break;
23111
23112 case PROCESSOR_K6:
23113 memory = get_attr_memory (insn);
23114
23115 /* The esp dependency is resolved before the instruction is really
23116 finished. */
23117 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23118 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23119 return 1;
23120
23121 /* INT->FP conversion is expensive. */
23122 if (get_attr_fp_int_src (dep_insn))
23123 cost += 5;
23124
23125 /* Show ability of reorder buffer to hide latency of load by executing
23126 in parallel with previous instruction in case
23127 previous instruction is not needed to compute the address. */
23128 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23129 && !ix86_agi_dependent (dep_insn, insn))
23130 {
23131 /* Claim moves to take one cycle, as core can issue one load
23132 at time and the next load can start cycle later. */
23133 if (dep_insn_type == TYPE_IMOV
23134 || dep_insn_type == TYPE_FMOV)
23135 cost = 1;
23136 else if (cost > 2)
23137 cost -= 2;
23138 else
23139 cost = 1;
23140 }
23141 break;
23142
23143 case PROCESSOR_ATHLON:
23144 case PROCESSOR_K8:
23145 case PROCESSOR_AMDFAM10:
23146 case PROCESSOR_BDVER1:
23147 case PROCESSOR_BDVER2:
23148 case PROCESSOR_BTVER1:
23149 case PROCESSOR_ATOM:
23150 case PROCESSOR_GENERIC32:
23151 case PROCESSOR_GENERIC64:
23152 memory = get_attr_memory (insn);
23153
23154 /* Show ability of reorder buffer to hide latency of load by executing
23155 in parallel with previous instruction in case
23156 previous instruction is not needed to compute the address. */
23157 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23158 && !ix86_agi_dependent (dep_insn, insn))
23159 {
23160 enum attr_unit unit = get_attr_unit (insn);
23161 int loadcost = 3;
23162
23163 /* Because of the difference between the length of integer and
23164 floating unit pipeline preparation stages, the memory operands
23165 for floating point are cheaper.
23166
23167 ??? For Athlon it the difference is most probably 2. */
23168 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23169 loadcost = 3;
23170 else
23171 loadcost = TARGET_ATHLON ? 2 : 0;
23172
23173 if (cost >= loadcost)
23174 cost -= loadcost;
23175 else
23176 cost = 0;
23177 }
23178
23179 default:
23180 break;
23181 }
23182
23183 return cost;
23184 }
23185
23186 /* How many alternative schedules to try. This should be as wide as the
23187 scheduling freedom in the DFA, but no wider. Making this value too
23188 large results extra work for the scheduler. */
23189
23190 static int
23191 ia32_multipass_dfa_lookahead (void)
23192 {
23193 switch (ix86_tune)
23194 {
23195 case PROCESSOR_PENTIUM:
23196 return 2;
23197
23198 case PROCESSOR_PENTIUMPRO:
23199 case PROCESSOR_K6:
23200 return 1;
23201
23202 case PROCESSOR_CORE2_32:
23203 case PROCESSOR_CORE2_64:
23204 case PROCESSOR_COREI7_32:
23205 case PROCESSOR_COREI7_64:
23206 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23207 as many instructions can be executed on a cycle, i.e.,
23208 issue_rate. I wonder why tuning for many CPUs does not do this. */
23209 return ix86_issue_rate ();
23210
23211 default:
23212 return 0;
23213 }
23214 }
23215
23216 \f
23217
23218 /* Model decoder of Core 2/i7.
23219 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23220 track the instruction fetch block boundaries and make sure that long
23221 (9+ bytes) instructions are assigned to D0. */
23222
23223 /* Maximum length of an insn that can be handled by
23224 a secondary decoder unit. '8' for Core 2/i7. */
23225 static int core2i7_secondary_decoder_max_insn_size;
23226
23227 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23228 '16' for Core 2/i7. */
23229 static int core2i7_ifetch_block_size;
23230
23231 /* Maximum number of instructions decoder can handle per cycle.
23232 '6' for Core 2/i7. */
23233 static int core2i7_ifetch_block_max_insns;
23234
23235 typedef struct ix86_first_cycle_multipass_data_ *
23236 ix86_first_cycle_multipass_data_t;
23237 typedef const struct ix86_first_cycle_multipass_data_ *
23238 const_ix86_first_cycle_multipass_data_t;
23239
23240 /* A variable to store target state across calls to max_issue within
23241 one cycle. */
23242 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23243 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23244
23245 /* Initialize DATA. */
23246 static void
23247 core2i7_first_cycle_multipass_init (void *_data)
23248 {
23249 ix86_first_cycle_multipass_data_t data
23250 = (ix86_first_cycle_multipass_data_t) _data;
23251
23252 data->ifetch_block_len = 0;
23253 data->ifetch_block_n_insns = 0;
23254 data->ready_try_change = NULL;
23255 data->ready_try_change_size = 0;
23256 }
23257
23258 /* Advancing the cycle; reset ifetch block counts. */
23259 static void
23260 core2i7_dfa_post_advance_cycle (void)
23261 {
23262 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23263
23264 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23265
23266 data->ifetch_block_len = 0;
23267 data->ifetch_block_n_insns = 0;
23268 }
23269
23270 static int min_insn_size (rtx);
23271
23272 /* Filter out insns from ready_try that the core will not be able to issue
23273 on current cycle due to decoder. */
23274 static void
23275 core2i7_first_cycle_multipass_filter_ready_try
23276 (const_ix86_first_cycle_multipass_data_t data,
23277 char *ready_try, int n_ready, bool first_cycle_insn_p)
23278 {
23279 while (n_ready--)
23280 {
23281 rtx insn;
23282 int insn_size;
23283
23284 if (ready_try[n_ready])
23285 continue;
23286
23287 insn = get_ready_element (n_ready);
23288 insn_size = min_insn_size (insn);
23289
23290 if (/* If this is a too long an insn for a secondary decoder ... */
23291 (!first_cycle_insn_p
23292 && insn_size > core2i7_secondary_decoder_max_insn_size)
23293 /* ... or it would not fit into the ifetch block ... */
23294 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23295 /* ... or the decoder is full already ... */
23296 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23297 /* ... mask the insn out. */
23298 {
23299 ready_try[n_ready] = 1;
23300
23301 if (data->ready_try_change)
23302 SET_BIT (data->ready_try_change, n_ready);
23303 }
23304 }
23305 }
23306
23307 /* Prepare for a new round of multipass lookahead scheduling. */
23308 static void
23309 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23310 bool first_cycle_insn_p)
23311 {
23312 ix86_first_cycle_multipass_data_t data
23313 = (ix86_first_cycle_multipass_data_t) _data;
23314 const_ix86_first_cycle_multipass_data_t prev_data
23315 = ix86_first_cycle_multipass_data;
23316
23317 /* Restore the state from the end of the previous round. */
23318 data->ifetch_block_len = prev_data->ifetch_block_len;
23319 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23320
23321 /* Filter instructions that cannot be issued on current cycle due to
23322 decoder restrictions. */
23323 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23324 first_cycle_insn_p);
23325 }
23326
23327 /* INSN is being issued in current solution. Account for its impact on
23328 the decoder model. */
23329 static void
23330 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23331 rtx insn, const void *_prev_data)
23332 {
23333 ix86_first_cycle_multipass_data_t data
23334 = (ix86_first_cycle_multipass_data_t) _data;
23335 const_ix86_first_cycle_multipass_data_t prev_data
23336 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23337
23338 int insn_size = min_insn_size (insn);
23339
23340 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23341 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23342 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23343 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23344
23345 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23346 if (!data->ready_try_change)
23347 {
23348 data->ready_try_change = sbitmap_alloc (n_ready);
23349 data->ready_try_change_size = n_ready;
23350 }
23351 else if (data->ready_try_change_size < n_ready)
23352 {
23353 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23354 n_ready, 0);
23355 data->ready_try_change_size = n_ready;
23356 }
23357 sbitmap_zero (data->ready_try_change);
23358
23359 /* Filter out insns from ready_try that the core will not be able to issue
23360 on current cycle due to decoder. */
23361 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23362 false);
23363 }
23364
23365 /* Revert the effect on ready_try. */
23366 static void
23367 core2i7_first_cycle_multipass_backtrack (const void *_data,
23368 char *ready_try,
23369 int n_ready ATTRIBUTE_UNUSED)
23370 {
23371 const_ix86_first_cycle_multipass_data_t data
23372 = (const_ix86_first_cycle_multipass_data_t) _data;
23373 unsigned int i = 0;
23374 sbitmap_iterator sbi;
23375
23376 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
23377 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
23378 {
23379 ready_try[i] = 0;
23380 }
23381 }
23382
23383 /* Save the result of multipass lookahead scheduling for the next round. */
23384 static void
23385 core2i7_first_cycle_multipass_end (const void *_data)
23386 {
23387 const_ix86_first_cycle_multipass_data_t data
23388 = (const_ix86_first_cycle_multipass_data_t) _data;
23389 ix86_first_cycle_multipass_data_t next_data
23390 = ix86_first_cycle_multipass_data;
23391
23392 if (data != NULL)
23393 {
23394 next_data->ifetch_block_len = data->ifetch_block_len;
23395 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23396 }
23397 }
23398
23399 /* Deallocate target data. */
23400 static void
23401 core2i7_first_cycle_multipass_fini (void *_data)
23402 {
23403 ix86_first_cycle_multipass_data_t data
23404 = (ix86_first_cycle_multipass_data_t) _data;
23405
23406 if (data->ready_try_change)
23407 {
23408 sbitmap_free (data->ready_try_change);
23409 data->ready_try_change = NULL;
23410 data->ready_try_change_size = 0;
23411 }
23412 }
23413
23414 /* Prepare for scheduling pass. */
23415 static void
23416 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23417 int verbose ATTRIBUTE_UNUSED,
23418 int max_uid ATTRIBUTE_UNUSED)
23419 {
23420 /* Install scheduling hooks for current CPU. Some of these hooks are used
23421 in time-critical parts of the scheduler, so we only set them up when
23422 they are actually used. */
23423 switch (ix86_tune)
23424 {
23425 case PROCESSOR_CORE2_32:
23426 case PROCESSOR_CORE2_64:
23427 case PROCESSOR_COREI7_32:
23428 case PROCESSOR_COREI7_64:
23429 targetm.sched.dfa_post_advance_cycle
23430 = core2i7_dfa_post_advance_cycle;
23431 targetm.sched.first_cycle_multipass_init
23432 = core2i7_first_cycle_multipass_init;
23433 targetm.sched.first_cycle_multipass_begin
23434 = core2i7_first_cycle_multipass_begin;
23435 targetm.sched.first_cycle_multipass_issue
23436 = core2i7_first_cycle_multipass_issue;
23437 targetm.sched.first_cycle_multipass_backtrack
23438 = core2i7_first_cycle_multipass_backtrack;
23439 targetm.sched.first_cycle_multipass_end
23440 = core2i7_first_cycle_multipass_end;
23441 targetm.sched.first_cycle_multipass_fini
23442 = core2i7_first_cycle_multipass_fini;
23443
23444 /* Set decoder parameters. */
23445 core2i7_secondary_decoder_max_insn_size = 8;
23446 core2i7_ifetch_block_size = 16;
23447 core2i7_ifetch_block_max_insns = 6;
23448 break;
23449
23450 default:
23451 targetm.sched.dfa_post_advance_cycle = NULL;
23452 targetm.sched.first_cycle_multipass_init = NULL;
23453 targetm.sched.first_cycle_multipass_begin = NULL;
23454 targetm.sched.first_cycle_multipass_issue = NULL;
23455 targetm.sched.first_cycle_multipass_backtrack = NULL;
23456 targetm.sched.first_cycle_multipass_end = NULL;
23457 targetm.sched.first_cycle_multipass_fini = NULL;
23458 break;
23459 }
23460 }
23461
23462 \f
23463 /* Compute the alignment given to a constant that is being placed in memory.
23464 EXP is the constant and ALIGN is the alignment that the object would
23465 ordinarily have.
23466 The value of this function is used instead of that alignment to align
23467 the object. */
23468
23469 int
23470 ix86_constant_alignment (tree exp, int align)
23471 {
23472 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23473 || TREE_CODE (exp) == INTEGER_CST)
23474 {
23475 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23476 return 64;
23477 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23478 return 128;
23479 }
23480 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23481 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23482 return BITS_PER_WORD;
23483
23484 return align;
23485 }
23486
23487 /* Compute the alignment for a static variable.
23488 TYPE is the data type, and ALIGN is the alignment that
23489 the object would ordinarily have. The value of this function is used
23490 instead of that alignment to align the object. */
23491
23492 int
23493 ix86_data_alignment (tree type, int align)
23494 {
23495 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23496
23497 if (AGGREGATE_TYPE_P (type)
23498 && TYPE_SIZE (type)
23499 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23500 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23501 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23502 && align < max_align)
23503 align = max_align;
23504
23505 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23506 to 16byte boundary. */
23507 if (TARGET_64BIT)
23508 {
23509 if (AGGREGATE_TYPE_P (type)
23510 && TYPE_SIZE (type)
23511 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23512 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23513 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23514 return 128;
23515 }
23516
23517 if (TREE_CODE (type) == ARRAY_TYPE)
23518 {
23519 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23520 return 64;
23521 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23522 return 128;
23523 }
23524 else if (TREE_CODE (type) == COMPLEX_TYPE)
23525 {
23526
23527 if (TYPE_MODE (type) == DCmode && align < 64)
23528 return 64;
23529 if ((TYPE_MODE (type) == XCmode
23530 || TYPE_MODE (type) == TCmode) && align < 128)
23531 return 128;
23532 }
23533 else if ((TREE_CODE (type) == RECORD_TYPE
23534 || TREE_CODE (type) == UNION_TYPE
23535 || TREE_CODE (type) == QUAL_UNION_TYPE)
23536 && TYPE_FIELDS (type))
23537 {
23538 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23539 return 64;
23540 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23541 return 128;
23542 }
23543 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23544 || TREE_CODE (type) == INTEGER_TYPE)
23545 {
23546 if (TYPE_MODE (type) == DFmode && align < 64)
23547 return 64;
23548 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23549 return 128;
23550 }
23551
23552 return align;
23553 }
23554
23555 /* Compute the alignment for a local variable or a stack slot. EXP is
23556 the data type or decl itself, MODE is the widest mode available and
23557 ALIGN is the alignment that the object would ordinarily have. The
23558 value of this macro is used instead of that alignment to align the
23559 object. */
23560
23561 unsigned int
23562 ix86_local_alignment (tree exp, enum machine_mode mode,
23563 unsigned int align)
23564 {
23565 tree type, decl;
23566
23567 if (exp && DECL_P (exp))
23568 {
23569 type = TREE_TYPE (exp);
23570 decl = exp;
23571 }
23572 else
23573 {
23574 type = exp;
23575 decl = NULL;
23576 }
23577
23578 /* Don't do dynamic stack realignment for long long objects with
23579 -mpreferred-stack-boundary=2. */
23580 if (!TARGET_64BIT
23581 && align == 64
23582 && ix86_preferred_stack_boundary < 64
23583 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23584 && (!type || !TYPE_USER_ALIGN (type))
23585 && (!decl || !DECL_USER_ALIGN (decl)))
23586 align = 32;
23587
23588 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23589 register in MODE. We will return the largest alignment of XF
23590 and DF. */
23591 if (!type)
23592 {
23593 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23594 align = GET_MODE_ALIGNMENT (DFmode);
23595 return align;
23596 }
23597
23598 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23599 to 16byte boundary. Exact wording is:
23600
23601 An array uses the same alignment as its elements, except that a local or
23602 global array variable of length at least 16 bytes or
23603 a C99 variable-length array variable always has alignment of at least 16 bytes.
23604
23605 This was added to allow use of aligned SSE instructions at arrays. This
23606 rule is meant for static storage (where compiler can not do the analysis
23607 by itself). We follow it for automatic variables only when convenient.
23608 We fully control everything in the function compiled and functions from
23609 other unit can not rely on the alignment.
23610
23611 Exclude va_list type. It is the common case of local array where
23612 we can not benefit from the alignment. */
23613 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23614 && TARGET_SSE)
23615 {
23616 if (AGGREGATE_TYPE_P (type)
23617 && (va_list_type_node == NULL_TREE
23618 || (TYPE_MAIN_VARIANT (type)
23619 != TYPE_MAIN_VARIANT (va_list_type_node)))
23620 && TYPE_SIZE (type)
23621 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23622 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23623 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23624 return 128;
23625 }
23626 if (TREE_CODE (type) == ARRAY_TYPE)
23627 {
23628 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23629 return 64;
23630 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23631 return 128;
23632 }
23633 else if (TREE_CODE (type) == COMPLEX_TYPE)
23634 {
23635 if (TYPE_MODE (type) == DCmode && align < 64)
23636 return 64;
23637 if ((TYPE_MODE (type) == XCmode
23638 || TYPE_MODE (type) == TCmode) && align < 128)
23639 return 128;
23640 }
23641 else if ((TREE_CODE (type) == RECORD_TYPE
23642 || TREE_CODE (type) == UNION_TYPE
23643 || TREE_CODE (type) == QUAL_UNION_TYPE)
23644 && TYPE_FIELDS (type))
23645 {
23646 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23647 return 64;
23648 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23649 return 128;
23650 }
23651 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23652 || TREE_CODE (type) == INTEGER_TYPE)
23653 {
23654
23655 if (TYPE_MODE (type) == DFmode && align < 64)
23656 return 64;
23657 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23658 return 128;
23659 }
23660 return align;
23661 }
23662
23663 /* Compute the minimum required alignment for dynamic stack realignment
23664 purposes for a local variable, parameter or a stack slot. EXP is
23665 the data type or decl itself, MODE is its mode and ALIGN is the
23666 alignment that the object would ordinarily have. */
23667
23668 unsigned int
23669 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23670 unsigned int align)
23671 {
23672 tree type, decl;
23673
23674 if (exp && DECL_P (exp))
23675 {
23676 type = TREE_TYPE (exp);
23677 decl = exp;
23678 }
23679 else
23680 {
23681 type = exp;
23682 decl = NULL;
23683 }
23684
23685 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23686 return align;
23687
23688 /* Don't do dynamic stack realignment for long long objects with
23689 -mpreferred-stack-boundary=2. */
23690 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23691 && (!type || !TYPE_USER_ALIGN (type))
23692 && (!decl || !DECL_USER_ALIGN (decl)))
23693 return 32;
23694
23695 return align;
23696 }
23697 \f
23698 /* Find a location for the static chain incoming to a nested function.
23699 This is a register, unless all free registers are used by arguments. */
23700
23701 static rtx
23702 ix86_static_chain (const_tree fndecl, bool incoming_p)
23703 {
23704 unsigned regno;
23705
23706 if (!DECL_STATIC_CHAIN (fndecl))
23707 return NULL;
23708
23709 if (TARGET_64BIT)
23710 {
23711 /* We always use R10 in 64-bit mode. */
23712 regno = R10_REG;
23713 }
23714 else
23715 {
23716 tree fntype;
23717 unsigned int ccvt;
23718
23719 /* By default in 32-bit mode we use ECX to pass the static chain. */
23720 regno = CX_REG;
23721
23722 fntype = TREE_TYPE (fndecl);
23723 ccvt = ix86_get_callcvt (fntype);
23724 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23725 {
23726 /* Fastcall functions use ecx/edx for arguments, which leaves
23727 us with EAX for the static chain.
23728 Thiscall functions use ecx for arguments, which also
23729 leaves us with EAX for the static chain. */
23730 regno = AX_REG;
23731 }
23732 else if (ix86_function_regparm (fntype, fndecl) == 3)
23733 {
23734 /* For regparm 3, we have no free call-clobbered registers in
23735 which to store the static chain. In order to implement this,
23736 we have the trampoline push the static chain to the stack.
23737 However, we can't push a value below the return address when
23738 we call the nested function directly, so we have to use an
23739 alternate entry point. For this we use ESI, and have the
23740 alternate entry point push ESI, so that things appear the
23741 same once we're executing the nested function. */
23742 if (incoming_p)
23743 {
23744 if (fndecl == current_function_decl)
23745 ix86_static_chain_on_stack = true;
23746 return gen_frame_mem (SImode,
23747 plus_constant (arg_pointer_rtx, -8));
23748 }
23749 regno = SI_REG;
23750 }
23751 }
23752
23753 return gen_rtx_REG (Pmode, regno);
23754 }
23755
23756 /* Emit RTL insns to initialize the variable parts of a trampoline.
23757 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23758 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23759 to be passed to the target function. */
23760
23761 static void
23762 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23763 {
23764 rtx mem, fnaddr;
23765 int opcode;
23766 int offset = 0;
23767
23768 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23769
23770 if (TARGET_64BIT)
23771 {
23772 int size;
23773
23774 /* Load the function address to r11. Try to load address using
23775 the shorter movl instead of movabs. We may want to support
23776 movq for kernel mode, but kernel does not use trampolines at
23777 the moment. */
23778 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23779 {
23780 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23781
23782 mem = adjust_address (m_tramp, HImode, offset);
23783 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23784
23785 mem = adjust_address (m_tramp, SImode, offset + 2);
23786 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23787 offset += 6;
23788 }
23789 else
23790 {
23791 mem = adjust_address (m_tramp, HImode, offset);
23792 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23793
23794 mem = adjust_address (m_tramp, DImode, offset + 2);
23795 emit_move_insn (mem, fnaddr);
23796 offset += 10;
23797 }
23798
23799 /* Load static chain using movabs to r10. Use the
23800 shorter movl instead of movabs for x32. */
23801 if (TARGET_X32)
23802 {
23803 opcode = 0xba41;
23804 size = 6;
23805 }
23806 else
23807 {
23808 opcode = 0xba49;
23809 size = 10;
23810 }
23811
23812 mem = adjust_address (m_tramp, HImode, offset);
23813 emit_move_insn (mem, gen_int_mode (opcode, HImode));
23814
23815 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
23816 emit_move_insn (mem, chain_value);
23817 offset += size;
23818
23819 /* Jump to r11; the last (unused) byte is a nop, only there to
23820 pad the write out to a single 32-bit store. */
23821 mem = adjust_address (m_tramp, SImode, offset);
23822 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23823 offset += 4;
23824 }
23825 else
23826 {
23827 rtx disp, chain;
23828
23829 /* Depending on the static chain location, either load a register
23830 with a constant, or push the constant to the stack. All of the
23831 instructions are the same size. */
23832 chain = ix86_static_chain (fndecl, true);
23833 if (REG_P (chain))
23834 {
23835 switch (REGNO (chain))
23836 {
23837 case AX_REG:
23838 opcode = 0xb8; break;
23839 case CX_REG:
23840 opcode = 0xb9; break;
23841 default:
23842 gcc_unreachable ();
23843 }
23844 }
23845 else
23846 opcode = 0x68;
23847
23848 mem = adjust_address (m_tramp, QImode, offset);
23849 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23850
23851 mem = adjust_address (m_tramp, SImode, offset + 1);
23852 emit_move_insn (mem, chain_value);
23853 offset += 5;
23854
23855 mem = adjust_address (m_tramp, QImode, offset);
23856 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23857
23858 mem = adjust_address (m_tramp, SImode, offset + 1);
23859
23860 /* Compute offset from the end of the jmp to the target function.
23861 In the case in which the trampoline stores the static chain on
23862 the stack, we need to skip the first insn which pushes the
23863 (call-saved) register static chain; this push is 1 byte. */
23864 offset += 5;
23865 disp = expand_binop (SImode, sub_optab, fnaddr,
23866 plus_constant (XEXP (m_tramp, 0),
23867 offset - (MEM_P (chain) ? 1 : 0)),
23868 NULL_RTX, 1, OPTAB_DIRECT);
23869 emit_move_insn (mem, disp);
23870 }
23871
23872 gcc_assert (offset <= TRAMPOLINE_SIZE);
23873
23874 #ifdef HAVE_ENABLE_EXECUTE_STACK
23875 #ifdef CHECK_EXECUTE_STACK_ENABLED
23876 if (CHECK_EXECUTE_STACK_ENABLED)
23877 #endif
23878 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23879 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23880 #endif
23881 }
23882 \f
23883 /* The following file contains several enumerations and data structures
23884 built from the definitions in i386-builtin-types.def. */
23885
23886 #include "i386-builtin-types.inc"
23887
23888 /* Table for the ix86 builtin non-function types. */
23889 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23890
23891 /* Retrieve an element from the above table, building some of
23892 the types lazily. */
23893
23894 static tree
23895 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23896 {
23897 unsigned int index;
23898 tree type, itype;
23899
23900 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23901
23902 type = ix86_builtin_type_tab[(int) tcode];
23903 if (type != NULL)
23904 return type;
23905
23906 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23907 if (tcode <= IX86_BT_LAST_VECT)
23908 {
23909 enum machine_mode mode;
23910
23911 index = tcode - IX86_BT_LAST_PRIM - 1;
23912 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23913 mode = ix86_builtin_type_vect_mode[index];
23914
23915 type = build_vector_type_for_mode (itype, mode);
23916 }
23917 else
23918 {
23919 int quals;
23920
23921 index = tcode - IX86_BT_LAST_VECT - 1;
23922 if (tcode <= IX86_BT_LAST_PTR)
23923 quals = TYPE_UNQUALIFIED;
23924 else
23925 quals = TYPE_QUAL_CONST;
23926
23927 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23928 if (quals != TYPE_UNQUALIFIED)
23929 itype = build_qualified_type (itype, quals);
23930
23931 type = build_pointer_type (itype);
23932 }
23933
23934 ix86_builtin_type_tab[(int) tcode] = type;
23935 return type;
23936 }
23937
23938 /* Table for the ix86 builtin function types. */
23939 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23940
23941 /* Retrieve an element from the above table, building some of
23942 the types lazily. */
23943
23944 static tree
23945 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23946 {
23947 tree type;
23948
23949 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23950
23951 type = ix86_builtin_func_type_tab[(int) tcode];
23952 if (type != NULL)
23953 return type;
23954
23955 if (tcode <= IX86_BT_LAST_FUNC)
23956 {
23957 unsigned start = ix86_builtin_func_start[(int) tcode];
23958 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23959 tree rtype, atype, args = void_list_node;
23960 unsigned i;
23961
23962 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23963 for (i = after - 1; i > start; --i)
23964 {
23965 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23966 args = tree_cons (NULL, atype, args);
23967 }
23968
23969 type = build_function_type (rtype, args);
23970 }
23971 else
23972 {
23973 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23974 enum ix86_builtin_func_type icode;
23975
23976 icode = ix86_builtin_func_alias_base[index];
23977 type = ix86_get_builtin_func_type (icode);
23978 }
23979
23980 ix86_builtin_func_type_tab[(int) tcode] = type;
23981 return type;
23982 }
23983
23984
23985 /* Codes for all the SSE/MMX builtins. */
23986 enum ix86_builtins
23987 {
23988 IX86_BUILTIN_ADDPS,
23989 IX86_BUILTIN_ADDSS,
23990 IX86_BUILTIN_DIVPS,
23991 IX86_BUILTIN_DIVSS,
23992 IX86_BUILTIN_MULPS,
23993 IX86_BUILTIN_MULSS,
23994 IX86_BUILTIN_SUBPS,
23995 IX86_BUILTIN_SUBSS,
23996
23997 IX86_BUILTIN_CMPEQPS,
23998 IX86_BUILTIN_CMPLTPS,
23999 IX86_BUILTIN_CMPLEPS,
24000 IX86_BUILTIN_CMPGTPS,
24001 IX86_BUILTIN_CMPGEPS,
24002 IX86_BUILTIN_CMPNEQPS,
24003 IX86_BUILTIN_CMPNLTPS,
24004 IX86_BUILTIN_CMPNLEPS,
24005 IX86_BUILTIN_CMPNGTPS,
24006 IX86_BUILTIN_CMPNGEPS,
24007 IX86_BUILTIN_CMPORDPS,
24008 IX86_BUILTIN_CMPUNORDPS,
24009 IX86_BUILTIN_CMPEQSS,
24010 IX86_BUILTIN_CMPLTSS,
24011 IX86_BUILTIN_CMPLESS,
24012 IX86_BUILTIN_CMPNEQSS,
24013 IX86_BUILTIN_CMPNLTSS,
24014 IX86_BUILTIN_CMPNLESS,
24015 IX86_BUILTIN_CMPNGTSS,
24016 IX86_BUILTIN_CMPNGESS,
24017 IX86_BUILTIN_CMPORDSS,
24018 IX86_BUILTIN_CMPUNORDSS,
24019
24020 IX86_BUILTIN_COMIEQSS,
24021 IX86_BUILTIN_COMILTSS,
24022 IX86_BUILTIN_COMILESS,
24023 IX86_BUILTIN_COMIGTSS,
24024 IX86_BUILTIN_COMIGESS,
24025 IX86_BUILTIN_COMINEQSS,
24026 IX86_BUILTIN_UCOMIEQSS,
24027 IX86_BUILTIN_UCOMILTSS,
24028 IX86_BUILTIN_UCOMILESS,
24029 IX86_BUILTIN_UCOMIGTSS,
24030 IX86_BUILTIN_UCOMIGESS,
24031 IX86_BUILTIN_UCOMINEQSS,
24032
24033 IX86_BUILTIN_CVTPI2PS,
24034 IX86_BUILTIN_CVTPS2PI,
24035 IX86_BUILTIN_CVTSI2SS,
24036 IX86_BUILTIN_CVTSI642SS,
24037 IX86_BUILTIN_CVTSS2SI,
24038 IX86_BUILTIN_CVTSS2SI64,
24039 IX86_BUILTIN_CVTTPS2PI,
24040 IX86_BUILTIN_CVTTSS2SI,
24041 IX86_BUILTIN_CVTTSS2SI64,
24042
24043 IX86_BUILTIN_MAXPS,
24044 IX86_BUILTIN_MAXSS,
24045 IX86_BUILTIN_MINPS,
24046 IX86_BUILTIN_MINSS,
24047
24048 IX86_BUILTIN_LOADUPS,
24049 IX86_BUILTIN_STOREUPS,
24050 IX86_BUILTIN_MOVSS,
24051
24052 IX86_BUILTIN_MOVHLPS,
24053 IX86_BUILTIN_MOVLHPS,
24054 IX86_BUILTIN_LOADHPS,
24055 IX86_BUILTIN_LOADLPS,
24056 IX86_BUILTIN_STOREHPS,
24057 IX86_BUILTIN_STORELPS,
24058
24059 IX86_BUILTIN_MASKMOVQ,
24060 IX86_BUILTIN_MOVMSKPS,
24061 IX86_BUILTIN_PMOVMSKB,
24062
24063 IX86_BUILTIN_MOVNTPS,
24064 IX86_BUILTIN_MOVNTQ,
24065
24066 IX86_BUILTIN_LOADDQU,
24067 IX86_BUILTIN_STOREDQU,
24068
24069 IX86_BUILTIN_PACKSSWB,
24070 IX86_BUILTIN_PACKSSDW,
24071 IX86_BUILTIN_PACKUSWB,
24072
24073 IX86_BUILTIN_PADDB,
24074 IX86_BUILTIN_PADDW,
24075 IX86_BUILTIN_PADDD,
24076 IX86_BUILTIN_PADDQ,
24077 IX86_BUILTIN_PADDSB,
24078 IX86_BUILTIN_PADDSW,
24079 IX86_BUILTIN_PADDUSB,
24080 IX86_BUILTIN_PADDUSW,
24081 IX86_BUILTIN_PSUBB,
24082 IX86_BUILTIN_PSUBW,
24083 IX86_BUILTIN_PSUBD,
24084 IX86_BUILTIN_PSUBQ,
24085 IX86_BUILTIN_PSUBSB,
24086 IX86_BUILTIN_PSUBSW,
24087 IX86_BUILTIN_PSUBUSB,
24088 IX86_BUILTIN_PSUBUSW,
24089
24090 IX86_BUILTIN_PAND,
24091 IX86_BUILTIN_PANDN,
24092 IX86_BUILTIN_POR,
24093 IX86_BUILTIN_PXOR,
24094
24095 IX86_BUILTIN_PAVGB,
24096 IX86_BUILTIN_PAVGW,
24097
24098 IX86_BUILTIN_PCMPEQB,
24099 IX86_BUILTIN_PCMPEQW,
24100 IX86_BUILTIN_PCMPEQD,
24101 IX86_BUILTIN_PCMPGTB,
24102 IX86_BUILTIN_PCMPGTW,
24103 IX86_BUILTIN_PCMPGTD,
24104
24105 IX86_BUILTIN_PMADDWD,
24106
24107 IX86_BUILTIN_PMAXSW,
24108 IX86_BUILTIN_PMAXUB,
24109 IX86_BUILTIN_PMINSW,
24110 IX86_BUILTIN_PMINUB,
24111
24112 IX86_BUILTIN_PMULHUW,
24113 IX86_BUILTIN_PMULHW,
24114 IX86_BUILTIN_PMULLW,
24115
24116 IX86_BUILTIN_PSADBW,
24117 IX86_BUILTIN_PSHUFW,
24118
24119 IX86_BUILTIN_PSLLW,
24120 IX86_BUILTIN_PSLLD,
24121 IX86_BUILTIN_PSLLQ,
24122 IX86_BUILTIN_PSRAW,
24123 IX86_BUILTIN_PSRAD,
24124 IX86_BUILTIN_PSRLW,
24125 IX86_BUILTIN_PSRLD,
24126 IX86_BUILTIN_PSRLQ,
24127 IX86_BUILTIN_PSLLWI,
24128 IX86_BUILTIN_PSLLDI,
24129 IX86_BUILTIN_PSLLQI,
24130 IX86_BUILTIN_PSRAWI,
24131 IX86_BUILTIN_PSRADI,
24132 IX86_BUILTIN_PSRLWI,
24133 IX86_BUILTIN_PSRLDI,
24134 IX86_BUILTIN_PSRLQI,
24135
24136 IX86_BUILTIN_PUNPCKHBW,
24137 IX86_BUILTIN_PUNPCKHWD,
24138 IX86_BUILTIN_PUNPCKHDQ,
24139 IX86_BUILTIN_PUNPCKLBW,
24140 IX86_BUILTIN_PUNPCKLWD,
24141 IX86_BUILTIN_PUNPCKLDQ,
24142
24143 IX86_BUILTIN_SHUFPS,
24144
24145 IX86_BUILTIN_RCPPS,
24146 IX86_BUILTIN_RCPSS,
24147 IX86_BUILTIN_RSQRTPS,
24148 IX86_BUILTIN_RSQRTPS_NR,
24149 IX86_BUILTIN_RSQRTSS,
24150 IX86_BUILTIN_RSQRTF,
24151 IX86_BUILTIN_SQRTPS,
24152 IX86_BUILTIN_SQRTPS_NR,
24153 IX86_BUILTIN_SQRTSS,
24154
24155 IX86_BUILTIN_UNPCKHPS,
24156 IX86_BUILTIN_UNPCKLPS,
24157
24158 IX86_BUILTIN_ANDPS,
24159 IX86_BUILTIN_ANDNPS,
24160 IX86_BUILTIN_ORPS,
24161 IX86_BUILTIN_XORPS,
24162
24163 IX86_BUILTIN_EMMS,
24164 IX86_BUILTIN_LDMXCSR,
24165 IX86_BUILTIN_STMXCSR,
24166 IX86_BUILTIN_SFENCE,
24167
24168 /* 3DNow! Original */
24169 IX86_BUILTIN_FEMMS,
24170 IX86_BUILTIN_PAVGUSB,
24171 IX86_BUILTIN_PF2ID,
24172 IX86_BUILTIN_PFACC,
24173 IX86_BUILTIN_PFADD,
24174 IX86_BUILTIN_PFCMPEQ,
24175 IX86_BUILTIN_PFCMPGE,
24176 IX86_BUILTIN_PFCMPGT,
24177 IX86_BUILTIN_PFMAX,
24178 IX86_BUILTIN_PFMIN,
24179 IX86_BUILTIN_PFMUL,
24180 IX86_BUILTIN_PFRCP,
24181 IX86_BUILTIN_PFRCPIT1,
24182 IX86_BUILTIN_PFRCPIT2,
24183 IX86_BUILTIN_PFRSQIT1,
24184 IX86_BUILTIN_PFRSQRT,
24185 IX86_BUILTIN_PFSUB,
24186 IX86_BUILTIN_PFSUBR,
24187 IX86_BUILTIN_PI2FD,
24188 IX86_BUILTIN_PMULHRW,
24189
24190 /* 3DNow! Athlon Extensions */
24191 IX86_BUILTIN_PF2IW,
24192 IX86_BUILTIN_PFNACC,
24193 IX86_BUILTIN_PFPNACC,
24194 IX86_BUILTIN_PI2FW,
24195 IX86_BUILTIN_PSWAPDSI,
24196 IX86_BUILTIN_PSWAPDSF,
24197
24198 /* SSE2 */
24199 IX86_BUILTIN_ADDPD,
24200 IX86_BUILTIN_ADDSD,
24201 IX86_BUILTIN_DIVPD,
24202 IX86_BUILTIN_DIVSD,
24203 IX86_BUILTIN_MULPD,
24204 IX86_BUILTIN_MULSD,
24205 IX86_BUILTIN_SUBPD,
24206 IX86_BUILTIN_SUBSD,
24207
24208 IX86_BUILTIN_CMPEQPD,
24209 IX86_BUILTIN_CMPLTPD,
24210 IX86_BUILTIN_CMPLEPD,
24211 IX86_BUILTIN_CMPGTPD,
24212 IX86_BUILTIN_CMPGEPD,
24213 IX86_BUILTIN_CMPNEQPD,
24214 IX86_BUILTIN_CMPNLTPD,
24215 IX86_BUILTIN_CMPNLEPD,
24216 IX86_BUILTIN_CMPNGTPD,
24217 IX86_BUILTIN_CMPNGEPD,
24218 IX86_BUILTIN_CMPORDPD,
24219 IX86_BUILTIN_CMPUNORDPD,
24220 IX86_BUILTIN_CMPEQSD,
24221 IX86_BUILTIN_CMPLTSD,
24222 IX86_BUILTIN_CMPLESD,
24223 IX86_BUILTIN_CMPNEQSD,
24224 IX86_BUILTIN_CMPNLTSD,
24225 IX86_BUILTIN_CMPNLESD,
24226 IX86_BUILTIN_CMPORDSD,
24227 IX86_BUILTIN_CMPUNORDSD,
24228
24229 IX86_BUILTIN_COMIEQSD,
24230 IX86_BUILTIN_COMILTSD,
24231 IX86_BUILTIN_COMILESD,
24232 IX86_BUILTIN_COMIGTSD,
24233 IX86_BUILTIN_COMIGESD,
24234 IX86_BUILTIN_COMINEQSD,
24235 IX86_BUILTIN_UCOMIEQSD,
24236 IX86_BUILTIN_UCOMILTSD,
24237 IX86_BUILTIN_UCOMILESD,
24238 IX86_BUILTIN_UCOMIGTSD,
24239 IX86_BUILTIN_UCOMIGESD,
24240 IX86_BUILTIN_UCOMINEQSD,
24241
24242 IX86_BUILTIN_MAXPD,
24243 IX86_BUILTIN_MAXSD,
24244 IX86_BUILTIN_MINPD,
24245 IX86_BUILTIN_MINSD,
24246
24247 IX86_BUILTIN_ANDPD,
24248 IX86_BUILTIN_ANDNPD,
24249 IX86_BUILTIN_ORPD,
24250 IX86_BUILTIN_XORPD,
24251
24252 IX86_BUILTIN_SQRTPD,
24253 IX86_BUILTIN_SQRTSD,
24254
24255 IX86_BUILTIN_UNPCKHPD,
24256 IX86_BUILTIN_UNPCKLPD,
24257
24258 IX86_BUILTIN_SHUFPD,
24259
24260 IX86_BUILTIN_LOADUPD,
24261 IX86_BUILTIN_STOREUPD,
24262 IX86_BUILTIN_MOVSD,
24263
24264 IX86_BUILTIN_LOADHPD,
24265 IX86_BUILTIN_LOADLPD,
24266
24267 IX86_BUILTIN_CVTDQ2PD,
24268 IX86_BUILTIN_CVTDQ2PS,
24269
24270 IX86_BUILTIN_CVTPD2DQ,
24271 IX86_BUILTIN_CVTPD2PI,
24272 IX86_BUILTIN_CVTPD2PS,
24273 IX86_BUILTIN_CVTTPD2DQ,
24274 IX86_BUILTIN_CVTTPD2PI,
24275
24276 IX86_BUILTIN_CVTPI2PD,
24277 IX86_BUILTIN_CVTSI2SD,
24278 IX86_BUILTIN_CVTSI642SD,
24279
24280 IX86_BUILTIN_CVTSD2SI,
24281 IX86_BUILTIN_CVTSD2SI64,
24282 IX86_BUILTIN_CVTSD2SS,
24283 IX86_BUILTIN_CVTSS2SD,
24284 IX86_BUILTIN_CVTTSD2SI,
24285 IX86_BUILTIN_CVTTSD2SI64,
24286
24287 IX86_BUILTIN_CVTPS2DQ,
24288 IX86_BUILTIN_CVTPS2PD,
24289 IX86_BUILTIN_CVTTPS2DQ,
24290
24291 IX86_BUILTIN_MOVNTI,
24292 IX86_BUILTIN_MOVNTPD,
24293 IX86_BUILTIN_MOVNTDQ,
24294
24295 IX86_BUILTIN_MOVQ128,
24296
24297 /* SSE2 MMX */
24298 IX86_BUILTIN_MASKMOVDQU,
24299 IX86_BUILTIN_MOVMSKPD,
24300 IX86_BUILTIN_PMOVMSKB128,
24301
24302 IX86_BUILTIN_PACKSSWB128,
24303 IX86_BUILTIN_PACKSSDW128,
24304 IX86_BUILTIN_PACKUSWB128,
24305
24306 IX86_BUILTIN_PADDB128,
24307 IX86_BUILTIN_PADDW128,
24308 IX86_BUILTIN_PADDD128,
24309 IX86_BUILTIN_PADDQ128,
24310 IX86_BUILTIN_PADDSB128,
24311 IX86_BUILTIN_PADDSW128,
24312 IX86_BUILTIN_PADDUSB128,
24313 IX86_BUILTIN_PADDUSW128,
24314 IX86_BUILTIN_PSUBB128,
24315 IX86_BUILTIN_PSUBW128,
24316 IX86_BUILTIN_PSUBD128,
24317 IX86_BUILTIN_PSUBQ128,
24318 IX86_BUILTIN_PSUBSB128,
24319 IX86_BUILTIN_PSUBSW128,
24320 IX86_BUILTIN_PSUBUSB128,
24321 IX86_BUILTIN_PSUBUSW128,
24322
24323 IX86_BUILTIN_PAND128,
24324 IX86_BUILTIN_PANDN128,
24325 IX86_BUILTIN_POR128,
24326 IX86_BUILTIN_PXOR128,
24327
24328 IX86_BUILTIN_PAVGB128,
24329 IX86_BUILTIN_PAVGW128,
24330
24331 IX86_BUILTIN_PCMPEQB128,
24332 IX86_BUILTIN_PCMPEQW128,
24333 IX86_BUILTIN_PCMPEQD128,
24334 IX86_BUILTIN_PCMPGTB128,
24335 IX86_BUILTIN_PCMPGTW128,
24336 IX86_BUILTIN_PCMPGTD128,
24337
24338 IX86_BUILTIN_PMADDWD128,
24339
24340 IX86_BUILTIN_PMAXSW128,
24341 IX86_BUILTIN_PMAXUB128,
24342 IX86_BUILTIN_PMINSW128,
24343 IX86_BUILTIN_PMINUB128,
24344
24345 IX86_BUILTIN_PMULUDQ,
24346 IX86_BUILTIN_PMULUDQ128,
24347 IX86_BUILTIN_PMULHUW128,
24348 IX86_BUILTIN_PMULHW128,
24349 IX86_BUILTIN_PMULLW128,
24350
24351 IX86_BUILTIN_PSADBW128,
24352 IX86_BUILTIN_PSHUFHW,
24353 IX86_BUILTIN_PSHUFLW,
24354 IX86_BUILTIN_PSHUFD,
24355
24356 IX86_BUILTIN_PSLLDQI128,
24357 IX86_BUILTIN_PSLLWI128,
24358 IX86_BUILTIN_PSLLDI128,
24359 IX86_BUILTIN_PSLLQI128,
24360 IX86_BUILTIN_PSRAWI128,
24361 IX86_BUILTIN_PSRADI128,
24362 IX86_BUILTIN_PSRLDQI128,
24363 IX86_BUILTIN_PSRLWI128,
24364 IX86_BUILTIN_PSRLDI128,
24365 IX86_BUILTIN_PSRLQI128,
24366
24367 IX86_BUILTIN_PSLLDQ128,
24368 IX86_BUILTIN_PSLLW128,
24369 IX86_BUILTIN_PSLLD128,
24370 IX86_BUILTIN_PSLLQ128,
24371 IX86_BUILTIN_PSRAW128,
24372 IX86_BUILTIN_PSRAD128,
24373 IX86_BUILTIN_PSRLW128,
24374 IX86_BUILTIN_PSRLD128,
24375 IX86_BUILTIN_PSRLQ128,
24376
24377 IX86_BUILTIN_PUNPCKHBW128,
24378 IX86_BUILTIN_PUNPCKHWD128,
24379 IX86_BUILTIN_PUNPCKHDQ128,
24380 IX86_BUILTIN_PUNPCKHQDQ128,
24381 IX86_BUILTIN_PUNPCKLBW128,
24382 IX86_BUILTIN_PUNPCKLWD128,
24383 IX86_BUILTIN_PUNPCKLDQ128,
24384 IX86_BUILTIN_PUNPCKLQDQ128,
24385
24386 IX86_BUILTIN_CLFLUSH,
24387 IX86_BUILTIN_MFENCE,
24388 IX86_BUILTIN_LFENCE,
24389 IX86_BUILTIN_PAUSE,
24390
24391 IX86_BUILTIN_BSRSI,
24392 IX86_BUILTIN_BSRDI,
24393 IX86_BUILTIN_RDPMC,
24394 IX86_BUILTIN_RDTSC,
24395 IX86_BUILTIN_RDTSCP,
24396 IX86_BUILTIN_ROLQI,
24397 IX86_BUILTIN_ROLHI,
24398 IX86_BUILTIN_RORQI,
24399 IX86_BUILTIN_RORHI,
24400
24401 /* SSE3. */
24402 IX86_BUILTIN_ADDSUBPS,
24403 IX86_BUILTIN_HADDPS,
24404 IX86_BUILTIN_HSUBPS,
24405 IX86_BUILTIN_MOVSHDUP,
24406 IX86_BUILTIN_MOVSLDUP,
24407 IX86_BUILTIN_ADDSUBPD,
24408 IX86_BUILTIN_HADDPD,
24409 IX86_BUILTIN_HSUBPD,
24410 IX86_BUILTIN_LDDQU,
24411
24412 IX86_BUILTIN_MONITOR,
24413 IX86_BUILTIN_MWAIT,
24414
24415 /* SSSE3. */
24416 IX86_BUILTIN_PHADDW,
24417 IX86_BUILTIN_PHADDD,
24418 IX86_BUILTIN_PHADDSW,
24419 IX86_BUILTIN_PHSUBW,
24420 IX86_BUILTIN_PHSUBD,
24421 IX86_BUILTIN_PHSUBSW,
24422 IX86_BUILTIN_PMADDUBSW,
24423 IX86_BUILTIN_PMULHRSW,
24424 IX86_BUILTIN_PSHUFB,
24425 IX86_BUILTIN_PSIGNB,
24426 IX86_BUILTIN_PSIGNW,
24427 IX86_BUILTIN_PSIGND,
24428 IX86_BUILTIN_PALIGNR,
24429 IX86_BUILTIN_PABSB,
24430 IX86_BUILTIN_PABSW,
24431 IX86_BUILTIN_PABSD,
24432
24433 IX86_BUILTIN_PHADDW128,
24434 IX86_BUILTIN_PHADDD128,
24435 IX86_BUILTIN_PHADDSW128,
24436 IX86_BUILTIN_PHSUBW128,
24437 IX86_BUILTIN_PHSUBD128,
24438 IX86_BUILTIN_PHSUBSW128,
24439 IX86_BUILTIN_PMADDUBSW128,
24440 IX86_BUILTIN_PMULHRSW128,
24441 IX86_BUILTIN_PSHUFB128,
24442 IX86_BUILTIN_PSIGNB128,
24443 IX86_BUILTIN_PSIGNW128,
24444 IX86_BUILTIN_PSIGND128,
24445 IX86_BUILTIN_PALIGNR128,
24446 IX86_BUILTIN_PABSB128,
24447 IX86_BUILTIN_PABSW128,
24448 IX86_BUILTIN_PABSD128,
24449
24450 /* AMDFAM10 - SSE4A New Instructions. */
24451 IX86_BUILTIN_MOVNTSD,
24452 IX86_BUILTIN_MOVNTSS,
24453 IX86_BUILTIN_EXTRQI,
24454 IX86_BUILTIN_EXTRQ,
24455 IX86_BUILTIN_INSERTQI,
24456 IX86_BUILTIN_INSERTQ,
24457
24458 /* SSE4.1. */
24459 IX86_BUILTIN_BLENDPD,
24460 IX86_BUILTIN_BLENDPS,
24461 IX86_BUILTIN_BLENDVPD,
24462 IX86_BUILTIN_BLENDVPS,
24463 IX86_BUILTIN_PBLENDVB128,
24464 IX86_BUILTIN_PBLENDW128,
24465
24466 IX86_BUILTIN_DPPD,
24467 IX86_BUILTIN_DPPS,
24468
24469 IX86_BUILTIN_INSERTPS128,
24470
24471 IX86_BUILTIN_MOVNTDQA,
24472 IX86_BUILTIN_MPSADBW128,
24473 IX86_BUILTIN_PACKUSDW128,
24474 IX86_BUILTIN_PCMPEQQ,
24475 IX86_BUILTIN_PHMINPOSUW128,
24476
24477 IX86_BUILTIN_PMAXSB128,
24478 IX86_BUILTIN_PMAXSD128,
24479 IX86_BUILTIN_PMAXUD128,
24480 IX86_BUILTIN_PMAXUW128,
24481
24482 IX86_BUILTIN_PMINSB128,
24483 IX86_BUILTIN_PMINSD128,
24484 IX86_BUILTIN_PMINUD128,
24485 IX86_BUILTIN_PMINUW128,
24486
24487 IX86_BUILTIN_PMOVSXBW128,
24488 IX86_BUILTIN_PMOVSXBD128,
24489 IX86_BUILTIN_PMOVSXBQ128,
24490 IX86_BUILTIN_PMOVSXWD128,
24491 IX86_BUILTIN_PMOVSXWQ128,
24492 IX86_BUILTIN_PMOVSXDQ128,
24493
24494 IX86_BUILTIN_PMOVZXBW128,
24495 IX86_BUILTIN_PMOVZXBD128,
24496 IX86_BUILTIN_PMOVZXBQ128,
24497 IX86_BUILTIN_PMOVZXWD128,
24498 IX86_BUILTIN_PMOVZXWQ128,
24499 IX86_BUILTIN_PMOVZXDQ128,
24500
24501 IX86_BUILTIN_PMULDQ128,
24502 IX86_BUILTIN_PMULLD128,
24503
24504 IX86_BUILTIN_ROUNDPD,
24505 IX86_BUILTIN_ROUNDPS,
24506 IX86_BUILTIN_ROUNDSD,
24507 IX86_BUILTIN_ROUNDSS,
24508
24509 IX86_BUILTIN_FLOORPD,
24510 IX86_BUILTIN_CEILPD,
24511 IX86_BUILTIN_TRUNCPD,
24512 IX86_BUILTIN_RINTPD,
24513 IX86_BUILTIN_ROUNDPD_AZ,
24514 IX86_BUILTIN_FLOORPS,
24515 IX86_BUILTIN_CEILPS,
24516 IX86_BUILTIN_TRUNCPS,
24517 IX86_BUILTIN_RINTPS,
24518 IX86_BUILTIN_ROUNDPS_AZ,
24519
24520 IX86_BUILTIN_PTESTZ,
24521 IX86_BUILTIN_PTESTC,
24522 IX86_BUILTIN_PTESTNZC,
24523
24524 IX86_BUILTIN_VEC_INIT_V2SI,
24525 IX86_BUILTIN_VEC_INIT_V4HI,
24526 IX86_BUILTIN_VEC_INIT_V8QI,
24527 IX86_BUILTIN_VEC_EXT_V2DF,
24528 IX86_BUILTIN_VEC_EXT_V2DI,
24529 IX86_BUILTIN_VEC_EXT_V4SF,
24530 IX86_BUILTIN_VEC_EXT_V4SI,
24531 IX86_BUILTIN_VEC_EXT_V8HI,
24532 IX86_BUILTIN_VEC_EXT_V2SI,
24533 IX86_BUILTIN_VEC_EXT_V4HI,
24534 IX86_BUILTIN_VEC_EXT_V16QI,
24535 IX86_BUILTIN_VEC_SET_V2DI,
24536 IX86_BUILTIN_VEC_SET_V4SF,
24537 IX86_BUILTIN_VEC_SET_V4SI,
24538 IX86_BUILTIN_VEC_SET_V8HI,
24539 IX86_BUILTIN_VEC_SET_V4HI,
24540 IX86_BUILTIN_VEC_SET_V16QI,
24541
24542 IX86_BUILTIN_VEC_PACK_SFIX,
24543
24544 /* SSE4.2. */
24545 IX86_BUILTIN_CRC32QI,
24546 IX86_BUILTIN_CRC32HI,
24547 IX86_BUILTIN_CRC32SI,
24548 IX86_BUILTIN_CRC32DI,
24549
24550 IX86_BUILTIN_PCMPESTRI128,
24551 IX86_BUILTIN_PCMPESTRM128,
24552 IX86_BUILTIN_PCMPESTRA128,
24553 IX86_BUILTIN_PCMPESTRC128,
24554 IX86_BUILTIN_PCMPESTRO128,
24555 IX86_BUILTIN_PCMPESTRS128,
24556 IX86_BUILTIN_PCMPESTRZ128,
24557 IX86_BUILTIN_PCMPISTRI128,
24558 IX86_BUILTIN_PCMPISTRM128,
24559 IX86_BUILTIN_PCMPISTRA128,
24560 IX86_BUILTIN_PCMPISTRC128,
24561 IX86_BUILTIN_PCMPISTRO128,
24562 IX86_BUILTIN_PCMPISTRS128,
24563 IX86_BUILTIN_PCMPISTRZ128,
24564
24565 IX86_BUILTIN_PCMPGTQ,
24566
24567 /* AES instructions */
24568 IX86_BUILTIN_AESENC128,
24569 IX86_BUILTIN_AESENCLAST128,
24570 IX86_BUILTIN_AESDEC128,
24571 IX86_BUILTIN_AESDECLAST128,
24572 IX86_BUILTIN_AESIMC128,
24573 IX86_BUILTIN_AESKEYGENASSIST128,
24574
24575 /* PCLMUL instruction */
24576 IX86_BUILTIN_PCLMULQDQ128,
24577
24578 /* AVX */
24579 IX86_BUILTIN_ADDPD256,
24580 IX86_BUILTIN_ADDPS256,
24581 IX86_BUILTIN_ADDSUBPD256,
24582 IX86_BUILTIN_ADDSUBPS256,
24583 IX86_BUILTIN_ANDPD256,
24584 IX86_BUILTIN_ANDPS256,
24585 IX86_BUILTIN_ANDNPD256,
24586 IX86_BUILTIN_ANDNPS256,
24587 IX86_BUILTIN_BLENDPD256,
24588 IX86_BUILTIN_BLENDPS256,
24589 IX86_BUILTIN_BLENDVPD256,
24590 IX86_BUILTIN_BLENDVPS256,
24591 IX86_BUILTIN_DIVPD256,
24592 IX86_BUILTIN_DIVPS256,
24593 IX86_BUILTIN_DPPS256,
24594 IX86_BUILTIN_HADDPD256,
24595 IX86_BUILTIN_HADDPS256,
24596 IX86_BUILTIN_HSUBPD256,
24597 IX86_BUILTIN_HSUBPS256,
24598 IX86_BUILTIN_MAXPD256,
24599 IX86_BUILTIN_MAXPS256,
24600 IX86_BUILTIN_MINPD256,
24601 IX86_BUILTIN_MINPS256,
24602 IX86_BUILTIN_MULPD256,
24603 IX86_BUILTIN_MULPS256,
24604 IX86_BUILTIN_ORPD256,
24605 IX86_BUILTIN_ORPS256,
24606 IX86_BUILTIN_SHUFPD256,
24607 IX86_BUILTIN_SHUFPS256,
24608 IX86_BUILTIN_SUBPD256,
24609 IX86_BUILTIN_SUBPS256,
24610 IX86_BUILTIN_XORPD256,
24611 IX86_BUILTIN_XORPS256,
24612 IX86_BUILTIN_CMPSD,
24613 IX86_BUILTIN_CMPSS,
24614 IX86_BUILTIN_CMPPD,
24615 IX86_BUILTIN_CMPPS,
24616 IX86_BUILTIN_CMPPD256,
24617 IX86_BUILTIN_CMPPS256,
24618 IX86_BUILTIN_CVTDQ2PD256,
24619 IX86_BUILTIN_CVTDQ2PS256,
24620 IX86_BUILTIN_CVTPD2PS256,
24621 IX86_BUILTIN_CVTPS2DQ256,
24622 IX86_BUILTIN_CVTPS2PD256,
24623 IX86_BUILTIN_CVTTPD2DQ256,
24624 IX86_BUILTIN_CVTPD2DQ256,
24625 IX86_BUILTIN_CVTTPS2DQ256,
24626 IX86_BUILTIN_EXTRACTF128PD256,
24627 IX86_BUILTIN_EXTRACTF128PS256,
24628 IX86_BUILTIN_EXTRACTF128SI256,
24629 IX86_BUILTIN_VZEROALL,
24630 IX86_BUILTIN_VZEROUPPER,
24631 IX86_BUILTIN_VPERMILVARPD,
24632 IX86_BUILTIN_VPERMILVARPS,
24633 IX86_BUILTIN_VPERMILVARPD256,
24634 IX86_BUILTIN_VPERMILVARPS256,
24635 IX86_BUILTIN_VPERMILPD,
24636 IX86_BUILTIN_VPERMILPS,
24637 IX86_BUILTIN_VPERMILPD256,
24638 IX86_BUILTIN_VPERMILPS256,
24639 IX86_BUILTIN_VPERMIL2PD,
24640 IX86_BUILTIN_VPERMIL2PS,
24641 IX86_BUILTIN_VPERMIL2PD256,
24642 IX86_BUILTIN_VPERMIL2PS256,
24643 IX86_BUILTIN_VPERM2F128PD256,
24644 IX86_BUILTIN_VPERM2F128PS256,
24645 IX86_BUILTIN_VPERM2F128SI256,
24646 IX86_BUILTIN_VBROADCASTSS,
24647 IX86_BUILTIN_VBROADCASTSD256,
24648 IX86_BUILTIN_VBROADCASTSS256,
24649 IX86_BUILTIN_VBROADCASTPD256,
24650 IX86_BUILTIN_VBROADCASTPS256,
24651 IX86_BUILTIN_VINSERTF128PD256,
24652 IX86_BUILTIN_VINSERTF128PS256,
24653 IX86_BUILTIN_VINSERTF128SI256,
24654 IX86_BUILTIN_LOADUPD256,
24655 IX86_BUILTIN_LOADUPS256,
24656 IX86_BUILTIN_STOREUPD256,
24657 IX86_BUILTIN_STOREUPS256,
24658 IX86_BUILTIN_LDDQU256,
24659 IX86_BUILTIN_MOVNTDQ256,
24660 IX86_BUILTIN_MOVNTPD256,
24661 IX86_BUILTIN_MOVNTPS256,
24662 IX86_BUILTIN_LOADDQU256,
24663 IX86_BUILTIN_STOREDQU256,
24664 IX86_BUILTIN_MASKLOADPD,
24665 IX86_BUILTIN_MASKLOADPS,
24666 IX86_BUILTIN_MASKSTOREPD,
24667 IX86_BUILTIN_MASKSTOREPS,
24668 IX86_BUILTIN_MASKLOADPD256,
24669 IX86_BUILTIN_MASKLOADPS256,
24670 IX86_BUILTIN_MASKSTOREPD256,
24671 IX86_BUILTIN_MASKSTOREPS256,
24672 IX86_BUILTIN_MOVSHDUP256,
24673 IX86_BUILTIN_MOVSLDUP256,
24674 IX86_BUILTIN_MOVDDUP256,
24675
24676 IX86_BUILTIN_SQRTPD256,
24677 IX86_BUILTIN_SQRTPS256,
24678 IX86_BUILTIN_SQRTPS_NR256,
24679 IX86_BUILTIN_RSQRTPS256,
24680 IX86_BUILTIN_RSQRTPS_NR256,
24681
24682 IX86_BUILTIN_RCPPS256,
24683
24684 IX86_BUILTIN_ROUNDPD256,
24685 IX86_BUILTIN_ROUNDPS256,
24686
24687 IX86_BUILTIN_FLOORPD256,
24688 IX86_BUILTIN_CEILPD256,
24689 IX86_BUILTIN_TRUNCPD256,
24690 IX86_BUILTIN_RINTPD256,
24691 IX86_BUILTIN_ROUNDPD_AZ256,
24692 IX86_BUILTIN_FLOORPS256,
24693 IX86_BUILTIN_CEILPS256,
24694 IX86_BUILTIN_TRUNCPS256,
24695 IX86_BUILTIN_RINTPS256,
24696 IX86_BUILTIN_ROUNDPS_AZ256,
24697
24698 IX86_BUILTIN_UNPCKHPD256,
24699 IX86_BUILTIN_UNPCKLPD256,
24700 IX86_BUILTIN_UNPCKHPS256,
24701 IX86_BUILTIN_UNPCKLPS256,
24702
24703 IX86_BUILTIN_SI256_SI,
24704 IX86_BUILTIN_PS256_PS,
24705 IX86_BUILTIN_PD256_PD,
24706 IX86_BUILTIN_SI_SI256,
24707 IX86_BUILTIN_PS_PS256,
24708 IX86_BUILTIN_PD_PD256,
24709
24710 IX86_BUILTIN_VTESTZPD,
24711 IX86_BUILTIN_VTESTCPD,
24712 IX86_BUILTIN_VTESTNZCPD,
24713 IX86_BUILTIN_VTESTZPS,
24714 IX86_BUILTIN_VTESTCPS,
24715 IX86_BUILTIN_VTESTNZCPS,
24716 IX86_BUILTIN_VTESTZPD256,
24717 IX86_BUILTIN_VTESTCPD256,
24718 IX86_BUILTIN_VTESTNZCPD256,
24719 IX86_BUILTIN_VTESTZPS256,
24720 IX86_BUILTIN_VTESTCPS256,
24721 IX86_BUILTIN_VTESTNZCPS256,
24722 IX86_BUILTIN_PTESTZ256,
24723 IX86_BUILTIN_PTESTC256,
24724 IX86_BUILTIN_PTESTNZC256,
24725
24726 IX86_BUILTIN_MOVMSKPD256,
24727 IX86_BUILTIN_MOVMSKPS256,
24728
24729 /* AVX2 */
24730 IX86_BUILTIN_MPSADBW256,
24731 IX86_BUILTIN_PABSB256,
24732 IX86_BUILTIN_PABSW256,
24733 IX86_BUILTIN_PABSD256,
24734 IX86_BUILTIN_PACKSSDW256,
24735 IX86_BUILTIN_PACKSSWB256,
24736 IX86_BUILTIN_PACKUSDW256,
24737 IX86_BUILTIN_PACKUSWB256,
24738 IX86_BUILTIN_PADDB256,
24739 IX86_BUILTIN_PADDW256,
24740 IX86_BUILTIN_PADDD256,
24741 IX86_BUILTIN_PADDQ256,
24742 IX86_BUILTIN_PADDSB256,
24743 IX86_BUILTIN_PADDSW256,
24744 IX86_BUILTIN_PADDUSB256,
24745 IX86_BUILTIN_PADDUSW256,
24746 IX86_BUILTIN_PALIGNR256,
24747 IX86_BUILTIN_AND256I,
24748 IX86_BUILTIN_ANDNOT256I,
24749 IX86_BUILTIN_PAVGB256,
24750 IX86_BUILTIN_PAVGW256,
24751 IX86_BUILTIN_PBLENDVB256,
24752 IX86_BUILTIN_PBLENDVW256,
24753 IX86_BUILTIN_PCMPEQB256,
24754 IX86_BUILTIN_PCMPEQW256,
24755 IX86_BUILTIN_PCMPEQD256,
24756 IX86_BUILTIN_PCMPEQQ256,
24757 IX86_BUILTIN_PCMPGTB256,
24758 IX86_BUILTIN_PCMPGTW256,
24759 IX86_BUILTIN_PCMPGTD256,
24760 IX86_BUILTIN_PCMPGTQ256,
24761 IX86_BUILTIN_PHADDW256,
24762 IX86_BUILTIN_PHADDD256,
24763 IX86_BUILTIN_PHADDSW256,
24764 IX86_BUILTIN_PHSUBW256,
24765 IX86_BUILTIN_PHSUBD256,
24766 IX86_BUILTIN_PHSUBSW256,
24767 IX86_BUILTIN_PMADDUBSW256,
24768 IX86_BUILTIN_PMADDWD256,
24769 IX86_BUILTIN_PMAXSB256,
24770 IX86_BUILTIN_PMAXSW256,
24771 IX86_BUILTIN_PMAXSD256,
24772 IX86_BUILTIN_PMAXUB256,
24773 IX86_BUILTIN_PMAXUW256,
24774 IX86_BUILTIN_PMAXUD256,
24775 IX86_BUILTIN_PMINSB256,
24776 IX86_BUILTIN_PMINSW256,
24777 IX86_BUILTIN_PMINSD256,
24778 IX86_BUILTIN_PMINUB256,
24779 IX86_BUILTIN_PMINUW256,
24780 IX86_BUILTIN_PMINUD256,
24781 IX86_BUILTIN_PMOVMSKB256,
24782 IX86_BUILTIN_PMOVSXBW256,
24783 IX86_BUILTIN_PMOVSXBD256,
24784 IX86_BUILTIN_PMOVSXBQ256,
24785 IX86_BUILTIN_PMOVSXWD256,
24786 IX86_BUILTIN_PMOVSXWQ256,
24787 IX86_BUILTIN_PMOVSXDQ256,
24788 IX86_BUILTIN_PMOVZXBW256,
24789 IX86_BUILTIN_PMOVZXBD256,
24790 IX86_BUILTIN_PMOVZXBQ256,
24791 IX86_BUILTIN_PMOVZXWD256,
24792 IX86_BUILTIN_PMOVZXWQ256,
24793 IX86_BUILTIN_PMOVZXDQ256,
24794 IX86_BUILTIN_PMULDQ256,
24795 IX86_BUILTIN_PMULHRSW256,
24796 IX86_BUILTIN_PMULHUW256,
24797 IX86_BUILTIN_PMULHW256,
24798 IX86_BUILTIN_PMULLW256,
24799 IX86_BUILTIN_PMULLD256,
24800 IX86_BUILTIN_PMULUDQ256,
24801 IX86_BUILTIN_POR256,
24802 IX86_BUILTIN_PSADBW256,
24803 IX86_BUILTIN_PSHUFB256,
24804 IX86_BUILTIN_PSHUFD256,
24805 IX86_BUILTIN_PSHUFHW256,
24806 IX86_BUILTIN_PSHUFLW256,
24807 IX86_BUILTIN_PSIGNB256,
24808 IX86_BUILTIN_PSIGNW256,
24809 IX86_BUILTIN_PSIGND256,
24810 IX86_BUILTIN_PSLLDQI256,
24811 IX86_BUILTIN_PSLLWI256,
24812 IX86_BUILTIN_PSLLW256,
24813 IX86_BUILTIN_PSLLDI256,
24814 IX86_BUILTIN_PSLLD256,
24815 IX86_BUILTIN_PSLLQI256,
24816 IX86_BUILTIN_PSLLQ256,
24817 IX86_BUILTIN_PSRAWI256,
24818 IX86_BUILTIN_PSRAW256,
24819 IX86_BUILTIN_PSRADI256,
24820 IX86_BUILTIN_PSRAD256,
24821 IX86_BUILTIN_PSRLDQI256,
24822 IX86_BUILTIN_PSRLWI256,
24823 IX86_BUILTIN_PSRLW256,
24824 IX86_BUILTIN_PSRLDI256,
24825 IX86_BUILTIN_PSRLD256,
24826 IX86_BUILTIN_PSRLQI256,
24827 IX86_BUILTIN_PSRLQ256,
24828 IX86_BUILTIN_PSUBB256,
24829 IX86_BUILTIN_PSUBW256,
24830 IX86_BUILTIN_PSUBD256,
24831 IX86_BUILTIN_PSUBQ256,
24832 IX86_BUILTIN_PSUBSB256,
24833 IX86_BUILTIN_PSUBSW256,
24834 IX86_BUILTIN_PSUBUSB256,
24835 IX86_BUILTIN_PSUBUSW256,
24836 IX86_BUILTIN_PUNPCKHBW256,
24837 IX86_BUILTIN_PUNPCKHWD256,
24838 IX86_BUILTIN_PUNPCKHDQ256,
24839 IX86_BUILTIN_PUNPCKHQDQ256,
24840 IX86_BUILTIN_PUNPCKLBW256,
24841 IX86_BUILTIN_PUNPCKLWD256,
24842 IX86_BUILTIN_PUNPCKLDQ256,
24843 IX86_BUILTIN_PUNPCKLQDQ256,
24844 IX86_BUILTIN_PXOR256,
24845 IX86_BUILTIN_MOVNTDQA256,
24846 IX86_BUILTIN_VBROADCASTSS_PS,
24847 IX86_BUILTIN_VBROADCASTSS_PS256,
24848 IX86_BUILTIN_VBROADCASTSD_PD256,
24849 IX86_BUILTIN_VBROADCASTSI256,
24850 IX86_BUILTIN_PBLENDD256,
24851 IX86_BUILTIN_PBLENDD128,
24852 IX86_BUILTIN_PBROADCASTB256,
24853 IX86_BUILTIN_PBROADCASTW256,
24854 IX86_BUILTIN_PBROADCASTD256,
24855 IX86_BUILTIN_PBROADCASTQ256,
24856 IX86_BUILTIN_PBROADCASTB128,
24857 IX86_BUILTIN_PBROADCASTW128,
24858 IX86_BUILTIN_PBROADCASTD128,
24859 IX86_BUILTIN_PBROADCASTQ128,
24860 IX86_BUILTIN_VPERMVARSI256,
24861 IX86_BUILTIN_VPERMDF256,
24862 IX86_BUILTIN_VPERMVARSF256,
24863 IX86_BUILTIN_VPERMDI256,
24864 IX86_BUILTIN_VPERMTI256,
24865 IX86_BUILTIN_VEXTRACT128I256,
24866 IX86_BUILTIN_VINSERT128I256,
24867 IX86_BUILTIN_MASKLOADD,
24868 IX86_BUILTIN_MASKLOADQ,
24869 IX86_BUILTIN_MASKLOADD256,
24870 IX86_BUILTIN_MASKLOADQ256,
24871 IX86_BUILTIN_MASKSTORED,
24872 IX86_BUILTIN_MASKSTOREQ,
24873 IX86_BUILTIN_MASKSTORED256,
24874 IX86_BUILTIN_MASKSTOREQ256,
24875 IX86_BUILTIN_PSLLVV4DI,
24876 IX86_BUILTIN_PSLLVV2DI,
24877 IX86_BUILTIN_PSLLVV8SI,
24878 IX86_BUILTIN_PSLLVV4SI,
24879 IX86_BUILTIN_PSRAVV8SI,
24880 IX86_BUILTIN_PSRAVV4SI,
24881 IX86_BUILTIN_PSRLVV4DI,
24882 IX86_BUILTIN_PSRLVV2DI,
24883 IX86_BUILTIN_PSRLVV8SI,
24884 IX86_BUILTIN_PSRLVV4SI,
24885
24886 IX86_BUILTIN_GATHERSIV2DF,
24887 IX86_BUILTIN_GATHERSIV4DF,
24888 IX86_BUILTIN_GATHERDIV2DF,
24889 IX86_BUILTIN_GATHERDIV4DF,
24890 IX86_BUILTIN_GATHERSIV4SF,
24891 IX86_BUILTIN_GATHERSIV8SF,
24892 IX86_BUILTIN_GATHERDIV4SF,
24893 IX86_BUILTIN_GATHERDIV8SF,
24894 IX86_BUILTIN_GATHERSIV2DI,
24895 IX86_BUILTIN_GATHERSIV4DI,
24896 IX86_BUILTIN_GATHERDIV2DI,
24897 IX86_BUILTIN_GATHERDIV4DI,
24898 IX86_BUILTIN_GATHERSIV4SI,
24899 IX86_BUILTIN_GATHERSIV8SI,
24900 IX86_BUILTIN_GATHERDIV4SI,
24901 IX86_BUILTIN_GATHERDIV8SI,
24902
24903 /* TFmode support builtins. */
24904 IX86_BUILTIN_INFQ,
24905 IX86_BUILTIN_HUGE_VALQ,
24906 IX86_BUILTIN_FABSQ,
24907 IX86_BUILTIN_COPYSIGNQ,
24908
24909 /* Vectorizer support builtins. */
24910 IX86_BUILTIN_CPYSGNPS,
24911 IX86_BUILTIN_CPYSGNPD,
24912 IX86_BUILTIN_CPYSGNPS256,
24913 IX86_BUILTIN_CPYSGNPD256,
24914
24915 IX86_BUILTIN_CVTUDQ2PS,
24916
24917 IX86_BUILTIN_VEC_PERM_V2DF,
24918 IX86_BUILTIN_VEC_PERM_V4SF,
24919 IX86_BUILTIN_VEC_PERM_V2DI,
24920 IX86_BUILTIN_VEC_PERM_V4SI,
24921 IX86_BUILTIN_VEC_PERM_V8HI,
24922 IX86_BUILTIN_VEC_PERM_V16QI,
24923 IX86_BUILTIN_VEC_PERM_V2DI_U,
24924 IX86_BUILTIN_VEC_PERM_V4SI_U,
24925 IX86_BUILTIN_VEC_PERM_V8HI_U,
24926 IX86_BUILTIN_VEC_PERM_V16QI_U,
24927 IX86_BUILTIN_VEC_PERM_V4DF,
24928 IX86_BUILTIN_VEC_PERM_V8SF,
24929
24930 /* FMA4 instructions. */
24931 IX86_BUILTIN_VFMADDSS,
24932 IX86_BUILTIN_VFMADDSD,
24933 IX86_BUILTIN_VFMADDPS,
24934 IX86_BUILTIN_VFMADDPD,
24935 IX86_BUILTIN_VFMADDPS256,
24936 IX86_BUILTIN_VFMADDPD256,
24937 IX86_BUILTIN_VFMADDSUBPS,
24938 IX86_BUILTIN_VFMADDSUBPD,
24939 IX86_BUILTIN_VFMADDSUBPS256,
24940 IX86_BUILTIN_VFMADDSUBPD256,
24941
24942 /* FMA3 instructions. */
24943 IX86_BUILTIN_VFMADDSS3,
24944 IX86_BUILTIN_VFMADDSD3,
24945
24946 /* XOP instructions. */
24947 IX86_BUILTIN_VPCMOV,
24948 IX86_BUILTIN_VPCMOV_V2DI,
24949 IX86_BUILTIN_VPCMOV_V4SI,
24950 IX86_BUILTIN_VPCMOV_V8HI,
24951 IX86_BUILTIN_VPCMOV_V16QI,
24952 IX86_BUILTIN_VPCMOV_V4SF,
24953 IX86_BUILTIN_VPCMOV_V2DF,
24954 IX86_BUILTIN_VPCMOV256,
24955 IX86_BUILTIN_VPCMOV_V4DI256,
24956 IX86_BUILTIN_VPCMOV_V8SI256,
24957 IX86_BUILTIN_VPCMOV_V16HI256,
24958 IX86_BUILTIN_VPCMOV_V32QI256,
24959 IX86_BUILTIN_VPCMOV_V8SF256,
24960 IX86_BUILTIN_VPCMOV_V4DF256,
24961
24962 IX86_BUILTIN_VPPERM,
24963
24964 IX86_BUILTIN_VPMACSSWW,
24965 IX86_BUILTIN_VPMACSWW,
24966 IX86_BUILTIN_VPMACSSWD,
24967 IX86_BUILTIN_VPMACSWD,
24968 IX86_BUILTIN_VPMACSSDD,
24969 IX86_BUILTIN_VPMACSDD,
24970 IX86_BUILTIN_VPMACSSDQL,
24971 IX86_BUILTIN_VPMACSSDQH,
24972 IX86_BUILTIN_VPMACSDQL,
24973 IX86_BUILTIN_VPMACSDQH,
24974 IX86_BUILTIN_VPMADCSSWD,
24975 IX86_BUILTIN_VPMADCSWD,
24976
24977 IX86_BUILTIN_VPHADDBW,
24978 IX86_BUILTIN_VPHADDBD,
24979 IX86_BUILTIN_VPHADDBQ,
24980 IX86_BUILTIN_VPHADDWD,
24981 IX86_BUILTIN_VPHADDWQ,
24982 IX86_BUILTIN_VPHADDDQ,
24983 IX86_BUILTIN_VPHADDUBW,
24984 IX86_BUILTIN_VPHADDUBD,
24985 IX86_BUILTIN_VPHADDUBQ,
24986 IX86_BUILTIN_VPHADDUWD,
24987 IX86_BUILTIN_VPHADDUWQ,
24988 IX86_BUILTIN_VPHADDUDQ,
24989 IX86_BUILTIN_VPHSUBBW,
24990 IX86_BUILTIN_VPHSUBWD,
24991 IX86_BUILTIN_VPHSUBDQ,
24992
24993 IX86_BUILTIN_VPROTB,
24994 IX86_BUILTIN_VPROTW,
24995 IX86_BUILTIN_VPROTD,
24996 IX86_BUILTIN_VPROTQ,
24997 IX86_BUILTIN_VPROTB_IMM,
24998 IX86_BUILTIN_VPROTW_IMM,
24999 IX86_BUILTIN_VPROTD_IMM,
25000 IX86_BUILTIN_VPROTQ_IMM,
25001
25002 IX86_BUILTIN_VPSHLB,
25003 IX86_BUILTIN_VPSHLW,
25004 IX86_BUILTIN_VPSHLD,
25005 IX86_BUILTIN_VPSHLQ,
25006 IX86_BUILTIN_VPSHAB,
25007 IX86_BUILTIN_VPSHAW,
25008 IX86_BUILTIN_VPSHAD,
25009 IX86_BUILTIN_VPSHAQ,
25010
25011 IX86_BUILTIN_VFRCZSS,
25012 IX86_BUILTIN_VFRCZSD,
25013 IX86_BUILTIN_VFRCZPS,
25014 IX86_BUILTIN_VFRCZPD,
25015 IX86_BUILTIN_VFRCZPS256,
25016 IX86_BUILTIN_VFRCZPD256,
25017
25018 IX86_BUILTIN_VPCOMEQUB,
25019 IX86_BUILTIN_VPCOMNEUB,
25020 IX86_BUILTIN_VPCOMLTUB,
25021 IX86_BUILTIN_VPCOMLEUB,
25022 IX86_BUILTIN_VPCOMGTUB,
25023 IX86_BUILTIN_VPCOMGEUB,
25024 IX86_BUILTIN_VPCOMFALSEUB,
25025 IX86_BUILTIN_VPCOMTRUEUB,
25026
25027 IX86_BUILTIN_VPCOMEQUW,
25028 IX86_BUILTIN_VPCOMNEUW,
25029 IX86_BUILTIN_VPCOMLTUW,
25030 IX86_BUILTIN_VPCOMLEUW,
25031 IX86_BUILTIN_VPCOMGTUW,
25032 IX86_BUILTIN_VPCOMGEUW,
25033 IX86_BUILTIN_VPCOMFALSEUW,
25034 IX86_BUILTIN_VPCOMTRUEUW,
25035
25036 IX86_BUILTIN_VPCOMEQUD,
25037 IX86_BUILTIN_VPCOMNEUD,
25038 IX86_BUILTIN_VPCOMLTUD,
25039 IX86_BUILTIN_VPCOMLEUD,
25040 IX86_BUILTIN_VPCOMGTUD,
25041 IX86_BUILTIN_VPCOMGEUD,
25042 IX86_BUILTIN_VPCOMFALSEUD,
25043 IX86_BUILTIN_VPCOMTRUEUD,
25044
25045 IX86_BUILTIN_VPCOMEQUQ,
25046 IX86_BUILTIN_VPCOMNEUQ,
25047 IX86_BUILTIN_VPCOMLTUQ,
25048 IX86_BUILTIN_VPCOMLEUQ,
25049 IX86_BUILTIN_VPCOMGTUQ,
25050 IX86_BUILTIN_VPCOMGEUQ,
25051 IX86_BUILTIN_VPCOMFALSEUQ,
25052 IX86_BUILTIN_VPCOMTRUEUQ,
25053
25054 IX86_BUILTIN_VPCOMEQB,
25055 IX86_BUILTIN_VPCOMNEB,
25056 IX86_BUILTIN_VPCOMLTB,
25057 IX86_BUILTIN_VPCOMLEB,
25058 IX86_BUILTIN_VPCOMGTB,
25059 IX86_BUILTIN_VPCOMGEB,
25060 IX86_BUILTIN_VPCOMFALSEB,
25061 IX86_BUILTIN_VPCOMTRUEB,
25062
25063 IX86_BUILTIN_VPCOMEQW,
25064 IX86_BUILTIN_VPCOMNEW,
25065 IX86_BUILTIN_VPCOMLTW,
25066 IX86_BUILTIN_VPCOMLEW,
25067 IX86_BUILTIN_VPCOMGTW,
25068 IX86_BUILTIN_VPCOMGEW,
25069 IX86_BUILTIN_VPCOMFALSEW,
25070 IX86_BUILTIN_VPCOMTRUEW,
25071
25072 IX86_BUILTIN_VPCOMEQD,
25073 IX86_BUILTIN_VPCOMNED,
25074 IX86_BUILTIN_VPCOMLTD,
25075 IX86_BUILTIN_VPCOMLED,
25076 IX86_BUILTIN_VPCOMGTD,
25077 IX86_BUILTIN_VPCOMGED,
25078 IX86_BUILTIN_VPCOMFALSED,
25079 IX86_BUILTIN_VPCOMTRUED,
25080
25081 IX86_BUILTIN_VPCOMEQQ,
25082 IX86_BUILTIN_VPCOMNEQ,
25083 IX86_BUILTIN_VPCOMLTQ,
25084 IX86_BUILTIN_VPCOMLEQ,
25085 IX86_BUILTIN_VPCOMGTQ,
25086 IX86_BUILTIN_VPCOMGEQ,
25087 IX86_BUILTIN_VPCOMFALSEQ,
25088 IX86_BUILTIN_VPCOMTRUEQ,
25089
25090 /* LWP instructions. */
25091 IX86_BUILTIN_LLWPCB,
25092 IX86_BUILTIN_SLWPCB,
25093 IX86_BUILTIN_LWPVAL32,
25094 IX86_BUILTIN_LWPVAL64,
25095 IX86_BUILTIN_LWPINS32,
25096 IX86_BUILTIN_LWPINS64,
25097
25098 IX86_BUILTIN_CLZS,
25099
25100 /* BMI instructions. */
25101 IX86_BUILTIN_BEXTR32,
25102 IX86_BUILTIN_BEXTR64,
25103 IX86_BUILTIN_CTZS,
25104
25105 /* TBM instructions. */
25106 IX86_BUILTIN_BEXTRI32,
25107 IX86_BUILTIN_BEXTRI64,
25108
25109 /* BMI2 instructions. */
25110 IX86_BUILTIN_BZHI32,
25111 IX86_BUILTIN_BZHI64,
25112 IX86_BUILTIN_PDEP32,
25113 IX86_BUILTIN_PDEP64,
25114 IX86_BUILTIN_PEXT32,
25115 IX86_BUILTIN_PEXT64,
25116
25117 /* FSGSBASE instructions. */
25118 IX86_BUILTIN_RDFSBASE32,
25119 IX86_BUILTIN_RDFSBASE64,
25120 IX86_BUILTIN_RDGSBASE32,
25121 IX86_BUILTIN_RDGSBASE64,
25122 IX86_BUILTIN_WRFSBASE32,
25123 IX86_BUILTIN_WRFSBASE64,
25124 IX86_BUILTIN_WRGSBASE32,
25125 IX86_BUILTIN_WRGSBASE64,
25126
25127 /* RDRND instructions. */
25128 IX86_BUILTIN_RDRAND16_STEP,
25129 IX86_BUILTIN_RDRAND32_STEP,
25130 IX86_BUILTIN_RDRAND64_STEP,
25131
25132 /* F16C instructions. */
25133 IX86_BUILTIN_CVTPH2PS,
25134 IX86_BUILTIN_CVTPH2PS256,
25135 IX86_BUILTIN_CVTPS2PH,
25136 IX86_BUILTIN_CVTPS2PH256,
25137
25138 /* CFString built-in for darwin */
25139 IX86_BUILTIN_CFSTRING,
25140
25141 IX86_BUILTIN_MAX
25142 };
25143
25144 /* Table for the ix86 builtin decls. */
25145 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25146
25147 /* Table of all of the builtin functions that are possible with different ISA's
25148 but are waiting to be built until a function is declared to use that
25149 ISA. */
25150 struct builtin_isa {
25151 const char *name; /* function name */
25152 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25153 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25154 bool const_p; /* true if the declaration is constant */
25155 bool set_and_not_built_p;
25156 };
25157
25158 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25159
25160
25161 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25162 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25163 function decl in the ix86_builtins array. Returns the function decl or
25164 NULL_TREE, if the builtin was not added.
25165
25166 If the front end has a special hook for builtin functions, delay adding
25167 builtin functions that aren't in the current ISA until the ISA is changed
25168 with function specific optimization. Doing so, can save about 300K for the
25169 default compiler. When the builtin is expanded, check at that time whether
25170 it is valid.
25171
25172 If the front end doesn't have a special hook, record all builtins, even if
25173 it isn't an instruction set in the current ISA in case the user uses
25174 function specific options for a different ISA, so that we don't get scope
25175 errors if a builtin is added in the middle of a function scope. */
25176
25177 static inline tree
25178 def_builtin (HOST_WIDE_INT mask, const char *name,
25179 enum ix86_builtin_func_type tcode,
25180 enum ix86_builtins code)
25181 {
25182 tree decl = NULL_TREE;
25183
25184 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25185 {
25186 ix86_builtins_isa[(int) code].isa = mask;
25187
25188 mask &= ~OPTION_MASK_ISA_64BIT;
25189 if (mask == 0
25190 || (mask & ix86_isa_flags) != 0
25191 || (lang_hooks.builtin_function
25192 == lang_hooks.builtin_function_ext_scope))
25193
25194 {
25195 tree type = ix86_get_builtin_func_type (tcode);
25196 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25197 NULL, NULL_TREE);
25198 ix86_builtins[(int) code] = decl;
25199 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25200 }
25201 else
25202 {
25203 ix86_builtins[(int) code] = NULL_TREE;
25204 ix86_builtins_isa[(int) code].tcode = tcode;
25205 ix86_builtins_isa[(int) code].name = name;
25206 ix86_builtins_isa[(int) code].const_p = false;
25207 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25208 }
25209 }
25210
25211 return decl;
25212 }
25213
25214 /* Like def_builtin, but also marks the function decl "const". */
25215
25216 static inline tree
25217 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25218 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25219 {
25220 tree decl = def_builtin (mask, name, tcode, code);
25221 if (decl)
25222 TREE_READONLY (decl) = 1;
25223 else
25224 ix86_builtins_isa[(int) code].const_p = true;
25225
25226 return decl;
25227 }
25228
25229 /* Add any new builtin functions for a given ISA that may not have been
25230 declared. This saves a bit of space compared to adding all of the
25231 declarations to the tree, even if we didn't use them. */
25232
25233 static void
25234 ix86_add_new_builtins (HOST_WIDE_INT isa)
25235 {
25236 int i;
25237
25238 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25239 {
25240 if ((ix86_builtins_isa[i].isa & isa) != 0
25241 && ix86_builtins_isa[i].set_and_not_built_p)
25242 {
25243 tree decl, type;
25244
25245 /* Don't define the builtin again. */
25246 ix86_builtins_isa[i].set_and_not_built_p = false;
25247
25248 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25249 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25250 type, i, BUILT_IN_MD, NULL,
25251 NULL_TREE);
25252
25253 ix86_builtins[i] = decl;
25254 if (ix86_builtins_isa[i].const_p)
25255 TREE_READONLY (decl) = 1;
25256 }
25257 }
25258 }
25259
25260 /* Bits for builtin_description.flag. */
25261
25262 /* Set when we don't support the comparison natively, and should
25263 swap_comparison in order to support it. */
25264 #define BUILTIN_DESC_SWAP_OPERANDS 1
25265
25266 struct builtin_description
25267 {
25268 const HOST_WIDE_INT mask;
25269 const enum insn_code icode;
25270 const char *const name;
25271 const enum ix86_builtins code;
25272 const enum rtx_code comparison;
25273 const int flag;
25274 };
25275
25276 static const struct builtin_description bdesc_comi[] =
25277 {
25278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25302 };
25303
25304 static const struct builtin_description bdesc_pcmpestr[] =
25305 {
25306 /* SSE4.2 */
25307 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25308 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25309 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25310 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25311 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25312 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25313 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25314 };
25315
25316 static const struct builtin_description bdesc_pcmpistr[] =
25317 {
25318 /* SSE4.2 */
25319 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25320 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25321 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25322 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25323 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25324 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25325 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25326 };
25327
25328 /* Special builtins with variable number of arguments. */
25329 static const struct builtin_description bdesc_special_args[] =
25330 {
25331 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25332 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25333 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25334
25335 /* MMX */
25336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25337
25338 /* 3DNow! */
25339 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25340
25341 /* SSE */
25342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25345
25346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25350
25351 /* SSE or 3DNow!A */
25352 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
25354
25355 /* SSE2 */
25356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
25360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
25362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
25363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
25364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25365
25366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25368
25369 /* SSE3 */
25370 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25371
25372 /* SSE4.1 */
25373 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
25374
25375 /* SSE4A */
25376 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25377 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25378
25379 /* AVX */
25380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
25381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
25382
25383 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25384 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25385 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
25387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25388
25389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25396
25397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25400
25401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25409
25410 /* AVX2 */
25411 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25412 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25413 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25414 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25415 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25416 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25417 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25418 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25419 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25420
25421 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25422 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25423 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25424 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25425 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25426 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25427
25428 /* FSGSBASE */
25429 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25430 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25431 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25432 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25433 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25434 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25435 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25436 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25437 };
25438
25439 /* Builtins with variable number of arguments. */
25440 static const struct builtin_description bdesc_args[] =
25441 {
25442 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25443 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25444 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25445 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25446 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25447 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25448 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25449
25450 /* MMX */
25451 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25456 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25457
25458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25460 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25466
25467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25469
25470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25474
25475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25481
25482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25488
25489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25492
25493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25494
25495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25501
25502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25508
25509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25513
25514 /* 3DNow! */
25515 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25516 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25517 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25518 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25519
25520 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25521 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25522 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25523 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25524 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25525 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25526 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25527 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25528 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25529 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25530 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25531 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25532 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25533 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25534 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25535
25536 /* 3DNow!A */
25537 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25538 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25539 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25540 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25541 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25542 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25543
25544 /* SSE */
25545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
25546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25547 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25549 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25553 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25556 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25557
25558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25559
25560 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25561 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25562 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25568
25569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
25580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25591
25592 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25593 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25596
25597 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25599 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25600 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25601
25602 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25603
25604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25607 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25608 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25609
25610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
25611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
25612 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
25613
25614 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
25615
25616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25619
25620 /* SSE MMX or 3Dnow!A */
25621 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25622 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25623 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25624
25625 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25626 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25627 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25628 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25629
25630 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
25631 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
25632
25633 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
25634
25635 /* SSE2 */
25636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25637
25638 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
25639 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
25640 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
25641 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
25642 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
25643 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25644 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
25645 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
25646 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
25647 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
25648 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
25649 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
25650
25651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
25652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
25653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
25655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25657
25658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
25661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25663
25664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
25665
25666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25668 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25669 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25670
25671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25674
25675 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25676 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25677 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25678 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25683
25684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25704
25705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25706 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25709
25710 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25712 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25713 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25714
25715 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25716
25717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25718 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25719 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25720
25721 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25722
25723 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25724 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25725 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25726 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25727 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25728 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25729 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25730 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25731
25732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25740
25741 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25742 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25743
25744 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25746 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25747 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25748
25749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25751
25752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25758
25759 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25760 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25761 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25763
25764 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25765 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25766 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25767 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25768 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25769 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25770 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25771 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25772
25773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25776
25777 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25779
25780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25782
25783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25784
25785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25786 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25789
25790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25791 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25792 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25793 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25794 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25795 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25796 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25797
25798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25799 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25800 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25801 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25802 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25803 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25804 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25805
25806 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25807 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25808 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25809 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25810
25811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25814
25815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25816
25817 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25818 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25819
25820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25821
25822 /* SSE2 MMX */
25823 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25824 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25825
25826 /* SSE3 */
25827 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25828 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25829
25830 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25831 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25832 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25833 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25834 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25835 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25836
25837 /* SSSE3 */
25838 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25839 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25840 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25841 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25842 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25843 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25844
25845 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25846 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25847 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25848 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25849 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25850 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25851 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25852 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25853 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25854 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25855 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25858 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25869
25870 /* SSSE3. */
25871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25873
25874 /* SSE4.1 */
25875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25884 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25885
25886 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25887 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25888 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25889 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25890 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25893 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25895 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25899
25900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25912
25913 /* SSE4.1 */
25914 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25915 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25916 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25917 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25918
25919 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25920 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25921 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25922 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25923
25924 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25925
25926 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25927 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25928 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25929 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25930
25931 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25932
25933 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25934 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25935 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25936
25937 /* SSE4.2 */
25938 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25939 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25940 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25941 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25942 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25943
25944 /* SSE4A */
25945 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25946 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25947 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25948 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25949
25950 /* AES */
25951 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25952 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25953
25954 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25955 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25956 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25957 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25958
25959 /* PCLMUL */
25960 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25961
25962 /* AVX */
25963 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25964 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25967 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25968 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25971 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25977 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25978 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25979 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25980 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25981 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25982 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25983 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25984 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25985 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25986 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25987 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25988 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25989
25990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25994
25995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26029
26030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26033
26034 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26036 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26038 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26039
26040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26041
26042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26044
26045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26049
26050 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26051
26052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26056
26057 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26058
26059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26063
26064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26067 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26068 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26069 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26070
26071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26086
26087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26089
26090 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26091 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26092
26093 /* AVX2 */
26094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26095 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26096 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26097 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26102 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26103 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26104 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26105 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26106 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26107 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26108 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26109 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26111 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26112 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26113 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26133 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26134 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26135 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26136 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26137 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26138 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26139 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26140 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26141 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26142 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26143 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26144 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26151 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26153 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26154 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26160 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26161 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26162 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26163 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26164 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26165 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26166 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26167 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26168 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26169 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26170 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26171 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26172 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26173 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26174 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26175 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26176 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26177 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26178 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26179 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26180 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26181 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26182 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26183 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26184 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26186 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26187 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26188 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26189 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26190 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26191 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26192 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26193 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26194 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26195 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26196 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26197 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26198 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26199 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26200 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26202 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26203 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26204 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26205 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26206 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26207 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26208 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26209 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26210 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26211 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26212 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26213 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26214 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26215 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26219 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26220 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26221 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26222 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26223 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26224 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26225 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26226 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26227 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26228 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26229 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26230 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26231 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26232 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26233 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26234 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26235 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26236 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26237 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26238 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26239 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26240
26241 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26242
26243 /* BMI */
26244 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26245 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26246 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26247
26248 /* TBM */
26249 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26250 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26251
26252 /* F16C */
26253 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26254 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26255 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26256 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26257
26258 /* BMI2 */
26259 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26260 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26261 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26262 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26263 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26264 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26265 };
26266
26267 /* FMA4 and XOP. */
26268 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26269 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26270 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26271 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26272 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26273 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26274 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26275 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26276 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26277 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26278 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26279 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26280 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26281 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26282 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26283 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26284 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26285 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26286 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26287 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26288 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26289 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26290 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26291 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26292 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26293 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26294 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26295 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26296 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26297 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26298 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26299 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26300 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26301 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26302 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26303 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26304 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26305 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26306 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26307 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26308 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26309 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26310 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26311 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26312 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26313 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26314 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26315 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26316 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26317 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26318 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26319 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26320
26321 static const struct builtin_description bdesc_multi_arg[] =
26322 {
26323 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26324 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26325 UNKNOWN, (int)MULTI_ARG_3_SF },
26326 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26327 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26328 UNKNOWN, (int)MULTI_ARG_3_DF },
26329
26330 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26331 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26332 UNKNOWN, (int)MULTI_ARG_3_SF },
26333 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26334 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26335 UNKNOWN, (int)MULTI_ARG_3_DF },
26336
26337 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26338 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26339 UNKNOWN, (int)MULTI_ARG_3_SF },
26340 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26341 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26342 UNKNOWN, (int)MULTI_ARG_3_DF },
26343 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26344 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26345 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26346 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26347 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
26348 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26349
26350 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
26351 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
26352 UNKNOWN, (int)MULTI_ARG_3_SF },
26353 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
26354 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
26355 UNKNOWN, (int)MULTI_ARG_3_DF },
26356 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
26357 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
26358 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26359 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
26360 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
26361 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26362
26363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
26364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
26365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
26366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
26367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
26368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
26369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
26370
26371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
26374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
26375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
26376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
26377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
26378
26379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
26380
26381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26393
26394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26410
26411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26417
26418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26433
26434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26441
26442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26449
26450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26457
26458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26465
26466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26473
26474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26481
26482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26489
26490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26497
26498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26506
26507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26515
26516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26520
26521 };
26522
26523 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26524 in the current target ISA to allow the user to compile particular modules
26525 with different target specific options that differ from the command line
26526 options. */
26527 static void
26528 ix86_init_mmx_sse_builtins (void)
26529 {
26530 const struct builtin_description * d;
26531 enum ix86_builtin_func_type ftype;
26532 size_t i;
26533
26534 /* Add all special builtins with variable number of operands. */
26535 for (i = 0, d = bdesc_special_args;
26536 i < ARRAY_SIZE (bdesc_special_args);
26537 i++, d++)
26538 {
26539 if (d->name == 0)
26540 continue;
26541
26542 ftype = (enum ix86_builtin_func_type) d->flag;
26543 def_builtin (d->mask, d->name, ftype, d->code);
26544 }
26545
26546 /* Add all builtins with variable number of operands. */
26547 for (i = 0, d = bdesc_args;
26548 i < ARRAY_SIZE (bdesc_args);
26549 i++, d++)
26550 {
26551 if (d->name == 0)
26552 continue;
26553
26554 ftype = (enum ix86_builtin_func_type) d->flag;
26555 def_builtin_const (d->mask, d->name, ftype, d->code);
26556 }
26557
26558 /* pcmpestr[im] insns. */
26559 for (i = 0, d = bdesc_pcmpestr;
26560 i < ARRAY_SIZE (bdesc_pcmpestr);
26561 i++, d++)
26562 {
26563 if (d->code == IX86_BUILTIN_PCMPESTRM128)
26564 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
26565 else
26566 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
26567 def_builtin_const (d->mask, d->name, ftype, d->code);
26568 }
26569
26570 /* pcmpistr[im] insns. */
26571 for (i = 0, d = bdesc_pcmpistr;
26572 i < ARRAY_SIZE (bdesc_pcmpistr);
26573 i++, d++)
26574 {
26575 if (d->code == IX86_BUILTIN_PCMPISTRM128)
26576 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
26577 else
26578 ftype = INT_FTYPE_V16QI_V16QI_INT;
26579 def_builtin_const (d->mask, d->name, ftype, d->code);
26580 }
26581
26582 /* comi/ucomi insns. */
26583 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26584 {
26585 if (d->mask == OPTION_MASK_ISA_SSE2)
26586 ftype = INT_FTYPE_V2DF_V2DF;
26587 else
26588 ftype = INT_FTYPE_V4SF_V4SF;
26589 def_builtin_const (d->mask, d->name, ftype, d->code);
26590 }
26591
26592 /* SSE */
26593 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
26594 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
26595 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
26596 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
26597
26598 /* SSE or 3DNow!A */
26599 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26600 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
26601 IX86_BUILTIN_MASKMOVQ);
26602
26603 /* SSE2 */
26604 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
26605 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
26606
26607 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
26608 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
26609 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
26610 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
26611
26612 /* SSE3. */
26613 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
26614 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
26615 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
26616 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
26617
26618 /* AES */
26619 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
26620 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
26621 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
26622 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
26623 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
26624 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
26625 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
26626 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
26627 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
26628 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
26629 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
26630 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
26631
26632 /* PCLMUL */
26633 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
26634 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
26635
26636 /* RDRND */
26637 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
26638 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
26639 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
26640 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
26641 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
26642 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
26643 IX86_BUILTIN_RDRAND64_STEP);
26644
26645 /* AVX2 */
26646 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
26647 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
26648 IX86_BUILTIN_GATHERSIV2DF);
26649
26650 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
26651 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
26652 IX86_BUILTIN_GATHERSIV4DF);
26653
26654 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
26655 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
26656 IX86_BUILTIN_GATHERDIV2DF);
26657
26658 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
26659 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
26660 IX86_BUILTIN_GATHERDIV4DF);
26661
26662 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
26663 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
26664 IX86_BUILTIN_GATHERSIV4SF);
26665
26666 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
26667 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
26668 IX86_BUILTIN_GATHERSIV8SF);
26669
26670 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
26671 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
26672 IX86_BUILTIN_GATHERDIV4SF);
26673
26674 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
26675 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
26676 IX86_BUILTIN_GATHERDIV8SF);
26677
26678 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
26679 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
26680 IX86_BUILTIN_GATHERSIV2DI);
26681
26682 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
26683 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
26684 IX86_BUILTIN_GATHERSIV4DI);
26685
26686 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
26687 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
26688 IX86_BUILTIN_GATHERDIV2DI);
26689
26690 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
26691 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
26692 IX86_BUILTIN_GATHERDIV4DI);
26693
26694 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
26695 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
26696 IX86_BUILTIN_GATHERSIV4SI);
26697
26698 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
26699 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
26700 IX86_BUILTIN_GATHERSIV8SI);
26701
26702 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
26703 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
26704 IX86_BUILTIN_GATHERDIV4SI);
26705
26706 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
26707 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
26708 IX86_BUILTIN_GATHERDIV8SI);
26709
26710 /* MMX access to the vec_init patterns. */
26711 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
26712 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
26713
26714 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
26715 V4HI_FTYPE_HI_HI_HI_HI,
26716 IX86_BUILTIN_VEC_INIT_V4HI);
26717
26718 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
26719 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
26720 IX86_BUILTIN_VEC_INIT_V8QI);
26721
26722 /* Access to the vec_extract patterns. */
26723 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
26724 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
26725 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
26726 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
26727 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
26728 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
26729 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
26730 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
26731 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
26732 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
26733
26734 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26735 "__builtin_ia32_vec_ext_v4hi",
26736 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
26737
26738 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
26739 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
26740
26741 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
26742 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
26743
26744 /* Access to the vec_set patterns. */
26745 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
26746 "__builtin_ia32_vec_set_v2di",
26747 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
26748
26749 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
26750 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
26751
26752 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
26753 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
26754
26755 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
26756 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
26757
26758 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26759 "__builtin_ia32_vec_set_v4hi",
26760 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
26761
26762 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
26763 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
26764
26765 /* Add FMA4 multi-arg argument instructions */
26766 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26767 {
26768 if (d->name == 0)
26769 continue;
26770
26771 ftype = (enum ix86_builtin_func_type) d->flag;
26772 def_builtin_const (d->mask, d->name, ftype, d->code);
26773 }
26774 }
26775
26776 /* Internal method for ix86_init_builtins. */
26777
26778 static void
26779 ix86_init_builtins_va_builtins_abi (void)
26780 {
26781 tree ms_va_ref, sysv_va_ref;
26782 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
26783 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
26784 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
26785 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
26786
26787 if (!TARGET_64BIT)
26788 return;
26789 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
26790 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
26791 ms_va_ref = build_reference_type (ms_va_list_type_node);
26792 sysv_va_ref =
26793 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
26794
26795 fnvoid_va_end_ms =
26796 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26797 fnvoid_va_start_ms =
26798 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26799 fnvoid_va_end_sysv =
26800 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
26801 fnvoid_va_start_sysv =
26802 build_varargs_function_type_list (void_type_node, sysv_va_ref,
26803 NULL_TREE);
26804 fnvoid_va_copy_ms =
26805 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
26806 NULL_TREE);
26807 fnvoid_va_copy_sysv =
26808 build_function_type_list (void_type_node, sysv_va_ref,
26809 sysv_va_ref, NULL_TREE);
26810
26811 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
26812 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
26813 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
26814 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
26815 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
26816 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
26817 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
26818 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26819 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
26820 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26821 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
26822 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26823 }
26824
26825 static void
26826 ix86_init_builtin_types (void)
26827 {
26828 tree float128_type_node, float80_type_node;
26829
26830 /* The __float80 type. */
26831 float80_type_node = long_double_type_node;
26832 if (TYPE_MODE (float80_type_node) != XFmode)
26833 {
26834 /* The __float80 type. */
26835 float80_type_node = make_node (REAL_TYPE);
26836
26837 TYPE_PRECISION (float80_type_node) = 80;
26838 layout_type (float80_type_node);
26839 }
26840 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
26841
26842 /* The __float128 type. */
26843 float128_type_node = make_node (REAL_TYPE);
26844 TYPE_PRECISION (float128_type_node) = 128;
26845 layout_type (float128_type_node);
26846 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
26847
26848 /* This macro is built by i386-builtin-types.awk. */
26849 DEFINE_BUILTIN_PRIMITIVE_TYPES;
26850 }
26851
26852 static void
26853 ix86_init_builtins (void)
26854 {
26855 tree t;
26856
26857 ix86_init_builtin_types ();
26858
26859 /* TFmode support builtins. */
26860 def_builtin_const (0, "__builtin_infq",
26861 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26862 def_builtin_const (0, "__builtin_huge_valq",
26863 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26864
26865 /* We will expand them to normal call if SSE2 isn't available since
26866 they are used by libgcc. */
26867 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26868 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26869 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26870 TREE_READONLY (t) = 1;
26871 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26872
26873 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26874 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26875 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26876 TREE_READONLY (t) = 1;
26877 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26878
26879 ix86_init_mmx_sse_builtins ();
26880
26881 if (TARGET_LP64)
26882 ix86_init_builtins_va_builtins_abi ();
26883
26884 #ifdef SUBTARGET_INIT_BUILTINS
26885 SUBTARGET_INIT_BUILTINS;
26886 #endif
26887 }
26888
26889 /* Return the ix86 builtin for CODE. */
26890
26891 static tree
26892 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
26893 {
26894 if (code >= IX86_BUILTIN_MAX)
26895 return error_mark_node;
26896
26897 return ix86_builtins[code];
26898 }
26899
26900 /* Errors in the source file can cause expand_expr to return const0_rtx
26901 where we expect a vector. To avoid crashing, use one of the vector
26902 clear instructions. */
26903 static rtx
26904 safe_vector_operand (rtx x, enum machine_mode mode)
26905 {
26906 if (x == const0_rtx)
26907 x = CONST0_RTX (mode);
26908 return x;
26909 }
26910
26911 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26912
26913 static rtx
26914 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26915 {
26916 rtx pat;
26917 tree arg0 = CALL_EXPR_ARG (exp, 0);
26918 tree arg1 = CALL_EXPR_ARG (exp, 1);
26919 rtx op0 = expand_normal (arg0);
26920 rtx op1 = expand_normal (arg1);
26921 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26922 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26923 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26924
26925 if (VECTOR_MODE_P (mode0))
26926 op0 = safe_vector_operand (op0, mode0);
26927 if (VECTOR_MODE_P (mode1))
26928 op1 = safe_vector_operand (op1, mode1);
26929
26930 if (optimize || !target
26931 || GET_MODE (target) != tmode
26932 || !insn_data[icode].operand[0].predicate (target, tmode))
26933 target = gen_reg_rtx (tmode);
26934
26935 if (GET_MODE (op1) == SImode && mode1 == TImode)
26936 {
26937 rtx x = gen_reg_rtx (V4SImode);
26938 emit_insn (gen_sse2_loadd (x, op1));
26939 op1 = gen_lowpart (TImode, x);
26940 }
26941
26942 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26943 op0 = copy_to_mode_reg (mode0, op0);
26944 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26945 op1 = copy_to_mode_reg (mode1, op1);
26946
26947 pat = GEN_FCN (icode) (target, op0, op1);
26948 if (! pat)
26949 return 0;
26950
26951 emit_insn (pat);
26952
26953 return target;
26954 }
26955
26956 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26957
26958 static rtx
26959 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26960 enum ix86_builtin_func_type m_type,
26961 enum rtx_code sub_code)
26962 {
26963 rtx pat;
26964 int i;
26965 int nargs;
26966 bool comparison_p = false;
26967 bool tf_p = false;
26968 bool last_arg_constant = false;
26969 int num_memory = 0;
26970 struct {
26971 rtx op;
26972 enum machine_mode mode;
26973 } args[4];
26974
26975 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26976
26977 switch (m_type)
26978 {
26979 case MULTI_ARG_4_DF2_DI_I:
26980 case MULTI_ARG_4_DF2_DI_I1:
26981 case MULTI_ARG_4_SF2_SI_I:
26982 case MULTI_ARG_4_SF2_SI_I1:
26983 nargs = 4;
26984 last_arg_constant = true;
26985 break;
26986
26987 case MULTI_ARG_3_SF:
26988 case MULTI_ARG_3_DF:
26989 case MULTI_ARG_3_SF2:
26990 case MULTI_ARG_3_DF2:
26991 case MULTI_ARG_3_DI:
26992 case MULTI_ARG_3_SI:
26993 case MULTI_ARG_3_SI_DI:
26994 case MULTI_ARG_3_HI:
26995 case MULTI_ARG_3_HI_SI:
26996 case MULTI_ARG_3_QI:
26997 case MULTI_ARG_3_DI2:
26998 case MULTI_ARG_3_SI2:
26999 case MULTI_ARG_3_HI2:
27000 case MULTI_ARG_3_QI2:
27001 nargs = 3;
27002 break;
27003
27004 case MULTI_ARG_2_SF:
27005 case MULTI_ARG_2_DF:
27006 case MULTI_ARG_2_DI:
27007 case MULTI_ARG_2_SI:
27008 case MULTI_ARG_2_HI:
27009 case MULTI_ARG_2_QI:
27010 nargs = 2;
27011 break;
27012
27013 case MULTI_ARG_2_DI_IMM:
27014 case MULTI_ARG_2_SI_IMM:
27015 case MULTI_ARG_2_HI_IMM:
27016 case MULTI_ARG_2_QI_IMM:
27017 nargs = 2;
27018 last_arg_constant = true;
27019 break;
27020
27021 case MULTI_ARG_1_SF:
27022 case MULTI_ARG_1_DF:
27023 case MULTI_ARG_1_SF2:
27024 case MULTI_ARG_1_DF2:
27025 case MULTI_ARG_1_DI:
27026 case MULTI_ARG_1_SI:
27027 case MULTI_ARG_1_HI:
27028 case MULTI_ARG_1_QI:
27029 case MULTI_ARG_1_SI_DI:
27030 case MULTI_ARG_1_HI_DI:
27031 case MULTI_ARG_1_HI_SI:
27032 case MULTI_ARG_1_QI_DI:
27033 case MULTI_ARG_1_QI_SI:
27034 case MULTI_ARG_1_QI_HI:
27035 nargs = 1;
27036 break;
27037
27038 case MULTI_ARG_2_DI_CMP:
27039 case MULTI_ARG_2_SI_CMP:
27040 case MULTI_ARG_2_HI_CMP:
27041 case MULTI_ARG_2_QI_CMP:
27042 nargs = 2;
27043 comparison_p = true;
27044 break;
27045
27046 case MULTI_ARG_2_SF_TF:
27047 case MULTI_ARG_2_DF_TF:
27048 case MULTI_ARG_2_DI_TF:
27049 case MULTI_ARG_2_SI_TF:
27050 case MULTI_ARG_2_HI_TF:
27051 case MULTI_ARG_2_QI_TF:
27052 nargs = 2;
27053 tf_p = true;
27054 break;
27055
27056 default:
27057 gcc_unreachable ();
27058 }
27059
27060 if (optimize || !target
27061 || GET_MODE (target) != tmode
27062 || !insn_data[icode].operand[0].predicate (target, tmode))
27063 target = gen_reg_rtx (tmode);
27064
27065 gcc_assert (nargs <= 4);
27066
27067 for (i = 0; i < nargs; i++)
27068 {
27069 tree arg = CALL_EXPR_ARG (exp, i);
27070 rtx op = expand_normal (arg);
27071 int adjust = (comparison_p) ? 1 : 0;
27072 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27073
27074 if (last_arg_constant && i == nargs - 1)
27075 {
27076 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27077 {
27078 enum insn_code new_icode = icode;
27079 switch (icode)
27080 {
27081 case CODE_FOR_xop_vpermil2v2df3:
27082 case CODE_FOR_xop_vpermil2v4sf3:
27083 case CODE_FOR_xop_vpermil2v4df3:
27084 case CODE_FOR_xop_vpermil2v8sf3:
27085 error ("the last argument must be a 2-bit immediate");
27086 return gen_reg_rtx (tmode);
27087 case CODE_FOR_xop_rotlv2di3:
27088 new_icode = CODE_FOR_rotlv2di3;
27089 goto xop_rotl;
27090 case CODE_FOR_xop_rotlv4si3:
27091 new_icode = CODE_FOR_rotlv4si3;
27092 goto xop_rotl;
27093 case CODE_FOR_xop_rotlv8hi3:
27094 new_icode = CODE_FOR_rotlv8hi3;
27095 goto xop_rotl;
27096 case CODE_FOR_xop_rotlv16qi3:
27097 new_icode = CODE_FOR_rotlv16qi3;
27098 xop_rotl:
27099 if (CONST_INT_P (op))
27100 {
27101 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27102 op = GEN_INT (INTVAL (op) & mask);
27103 gcc_checking_assert
27104 (insn_data[icode].operand[i + 1].predicate (op, mode));
27105 }
27106 else
27107 {
27108 gcc_checking_assert
27109 (nargs == 2
27110 && insn_data[new_icode].operand[0].mode == tmode
27111 && insn_data[new_icode].operand[1].mode == tmode
27112 && insn_data[new_icode].operand[2].mode == mode
27113 && insn_data[new_icode].operand[0].predicate
27114 == insn_data[icode].operand[0].predicate
27115 && insn_data[new_icode].operand[1].predicate
27116 == insn_data[icode].operand[1].predicate);
27117 icode = new_icode;
27118 goto non_constant;
27119 }
27120 break;
27121 default:
27122 gcc_unreachable ();
27123 }
27124 }
27125 }
27126 else
27127 {
27128 non_constant:
27129 if (VECTOR_MODE_P (mode))
27130 op = safe_vector_operand (op, mode);
27131
27132 /* If we aren't optimizing, only allow one memory operand to be
27133 generated. */
27134 if (memory_operand (op, mode))
27135 num_memory++;
27136
27137 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27138
27139 if (optimize
27140 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27141 || num_memory > 1)
27142 op = force_reg (mode, op);
27143 }
27144
27145 args[i].op = op;
27146 args[i].mode = mode;
27147 }
27148
27149 switch (nargs)
27150 {
27151 case 1:
27152 pat = GEN_FCN (icode) (target, args[0].op);
27153 break;
27154
27155 case 2:
27156 if (tf_p)
27157 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27158 GEN_INT ((int)sub_code));
27159 else if (! comparison_p)
27160 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27161 else
27162 {
27163 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
27164 args[0].op,
27165 args[1].op);
27166
27167 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
27168 }
27169 break;
27170
27171 case 3:
27172 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27173 break;
27174
27175 case 4:
27176 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
27177 break;
27178
27179 default:
27180 gcc_unreachable ();
27181 }
27182
27183 if (! pat)
27184 return 0;
27185
27186 emit_insn (pat);
27187 return target;
27188 }
27189
27190 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27191 insns with vec_merge. */
27192
27193 static rtx
27194 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
27195 rtx target)
27196 {
27197 rtx pat;
27198 tree arg0 = CALL_EXPR_ARG (exp, 0);
27199 rtx op1, op0 = expand_normal (arg0);
27200 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27201 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27202
27203 if (optimize || !target
27204 || GET_MODE (target) != tmode
27205 || !insn_data[icode].operand[0].predicate (target, tmode))
27206 target = gen_reg_rtx (tmode);
27207
27208 if (VECTOR_MODE_P (mode0))
27209 op0 = safe_vector_operand (op0, mode0);
27210
27211 if ((optimize && !register_operand (op0, mode0))
27212 || !insn_data[icode].operand[1].predicate (op0, mode0))
27213 op0 = copy_to_mode_reg (mode0, op0);
27214
27215 op1 = op0;
27216 if (!insn_data[icode].operand[2].predicate (op1, mode0))
27217 op1 = copy_to_mode_reg (mode0, op1);
27218
27219 pat = GEN_FCN (icode) (target, op0, op1);
27220 if (! pat)
27221 return 0;
27222 emit_insn (pat);
27223 return target;
27224 }
27225
27226 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27227
27228 static rtx
27229 ix86_expand_sse_compare (const struct builtin_description *d,
27230 tree exp, rtx target, bool swap)
27231 {
27232 rtx pat;
27233 tree arg0 = CALL_EXPR_ARG (exp, 0);
27234 tree arg1 = CALL_EXPR_ARG (exp, 1);
27235 rtx op0 = expand_normal (arg0);
27236 rtx op1 = expand_normal (arg1);
27237 rtx op2;
27238 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27239 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27240 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
27241 enum rtx_code comparison = d->comparison;
27242
27243 if (VECTOR_MODE_P (mode0))
27244 op0 = safe_vector_operand (op0, mode0);
27245 if (VECTOR_MODE_P (mode1))
27246 op1 = safe_vector_operand (op1, mode1);
27247
27248 /* Swap operands if we have a comparison that isn't available in
27249 hardware. */
27250 if (swap)
27251 {
27252 rtx tmp = gen_reg_rtx (mode1);
27253 emit_move_insn (tmp, op1);
27254 op1 = op0;
27255 op0 = tmp;
27256 }
27257
27258 if (optimize || !target
27259 || GET_MODE (target) != tmode
27260 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27261 target = gen_reg_rtx (tmode);
27262
27263 if ((optimize && !register_operand (op0, mode0))
27264 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
27265 op0 = copy_to_mode_reg (mode0, op0);
27266 if ((optimize && !register_operand (op1, mode1))
27267 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
27268 op1 = copy_to_mode_reg (mode1, op1);
27269
27270 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
27271 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
27272 if (! pat)
27273 return 0;
27274 emit_insn (pat);
27275 return target;
27276 }
27277
27278 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27279
27280 static rtx
27281 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
27282 rtx target)
27283 {
27284 rtx pat;
27285 tree arg0 = CALL_EXPR_ARG (exp, 0);
27286 tree arg1 = CALL_EXPR_ARG (exp, 1);
27287 rtx op0 = expand_normal (arg0);
27288 rtx op1 = expand_normal (arg1);
27289 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27290 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27291 enum rtx_code comparison = d->comparison;
27292
27293 if (VECTOR_MODE_P (mode0))
27294 op0 = safe_vector_operand (op0, mode0);
27295 if (VECTOR_MODE_P (mode1))
27296 op1 = safe_vector_operand (op1, mode1);
27297
27298 /* Swap operands if we have a comparison that isn't available in
27299 hardware. */
27300 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
27301 {
27302 rtx tmp = op1;
27303 op1 = op0;
27304 op0 = tmp;
27305 }
27306
27307 target = gen_reg_rtx (SImode);
27308 emit_move_insn (target, const0_rtx);
27309 target = gen_rtx_SUBREG (QImode, target, 0);
27310
27311 if ((optimize && !register_operand (op0, mode0))
27312 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27313 op0 = copy_to_mode_reg (mode0, op0);
27314 if ((optimize && !register_operand (op1, mode1))
27315 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27316 op1 = copy_to_mode_reg (mode1, op1);
27317
27318 pat = GEN_FCN (d->icode) (op0, op1);
27319 if (! pat)
27320 return 0;
27321 emit_insn (pat);
27322 emit_insn (gen_rtx_SET (VOIDmode,
27323 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27324 gen_rtx_fmt_ee (comparison, QImode,
27325 SET_DEST (pat),
27326 const0_rtx)));
27327
27328 return SUBREG_REG (target);
27329 }
27330
27331 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27332
27333 static rtx
27334 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
27335 rtx target)
27336 {
27337 rtx pat;
27338 tree arg0 = CALL_EXPR_ARG (exp, 0);
27339 rtx op1, op0 = expand_normal (arg0);
27340 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27341 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27342
27343 if (optimize || target == 0
27344 || GET_MODE (target) != tmode
27345 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27346 target = gen_reg_rtx (tmode);
27347
27348 if (VECTOR_MODE_P (mode0))
27349 op0 = safe_vector_operand (op0, mode0);
27350
27351 if ((optimize && !register_operand (op0, mode0))
27352 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27353 op0 = copy_to_mode_reg (mode0, op0);
27354
27355 op1 = GEN_INT (d->comparison);
27356
27357 pat = GEN_FCN (d->icode) (target, op0, op1);
27358 if (! pat)
27359 return 0;
27360 emit_insn (pat);
27361 return target;
27362 }
27363
27364 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27365
27366 static rtx
27367 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
27368 rtx target)
27369 {
27370 rtx pat;
27371 tree arg0 = CALL_EXPR_ARG (exp, 0);
27372 tree arg1 = CALL_EXPR_ARG (exp, 1);
27373 rtx op0 = expand_normal (arg0);
27374 rtx op1 = expand_normal (arg1);
27375 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27376 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27377 enum rtx_code comparison = d->comparison;
27378
27379 if (VECTOR_MODE_P (mode0))
27380 op0 = safe_vector_operand (op0, mode0);
27381 if (VECTOR_MODE_P (mode1))
27382 op1 = safe_vector_operand (op1, mode1);
27383
27384 target = gen_reg_rtx (SImode);
27385 emit_move_insn (target, const0_rtx);
27386 target = gen_rtx_SUBREG (QImode, target, 0);
27387
27388 if ((optimize && !register_operand (op0, mode0))
27389 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27390 op0 = copy_to_mode_reg (mode0, op0);
27391 if ((optimize && !register_operand (op1, mode1))
27392 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27393 op1 = copy_to_mode_reg (mode1, op1);
27394
27395 pat = GEN_FCN (d->icode) (op0, op1);
27396 if (! pat)
27397 return 0;
27398 emit_insn (pat);
27399 emit_insn (gen_rtx_SET (VOIDmode,
27400 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27401 gen_rtx_fmt_ee (comparison, QImode,
27402 SET_DEST (pat),
27403 const0_rtx)));
27404
27405 return SUBREG_REG (target);
27406 }
27407
27408 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27409
27410 static rtx
27411 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
27412 tree exp, rtx target)
27413 {
27414 rtx pat;
27415 tree arg0 = CALL_EXPR_ARG (exp, 0);
27416 tree arg1 = CALL_EXPR_ARG (exp, 1);
27417 tree arg2 = CALL_EXPR_ARG (exp, 2);
27418 tree arg3 = CALL_EXPR_ARG (exp, 3);
27419 tree arg4 = CALL_EXPR_ARG (exp, 4);
27420 rtx scratch0, scratch1;
27421 rtx op0 = expand_normal (arg0);
27422 rtx op1 = expand_normal (arg1);
27423 rtx op2 = expand_normal (arg2);
27424 rtx op3 = expand_normal (arg3);
27425 rtx op4 = expand_normal (arg4);
27426 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
27427
27428 tmode0 = insn_data[d->icode].operand[0].mode;
27429 tmode1 = insn_data[d->icode].operand[1].mode;
27430 modev2 = insn_data[d->icode].operand[2].mode;
27431 modei3 = insn_data[d->icode].operand[3].mode;
27432 modev4 = insn_data[d->icode].operand[4].mode;
27433 modei5 = insn_data[d->icode].operand[5].mode;
27434 modeimm = insn_data[d->icode].operand[6].mode;
27435
27436 if (VECTOR_MODE_P (modev2))
27437 op0 = safe_vector_operand (op0, modev2);
27438 if (VECTOR_MODE_P (modev4))
27439 op2 = safe_vector_operand (op2, modev4);
27440
27441 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27442 op0 = copy_to_mode_reg (modev2, op0);
27443 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
27444 op1 = copy_to_mode_reg (modei3, op1);
27445 if ((optimize && !register_operand (op2, modev4))
27446 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
27447 op2 = copy_to_mode_reg (modev4, op2);
27448 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
27449 op3 = copy_to_mode_reg (modei5, op3);
27450
27451 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
27452 {
27453 error ("the fifth argument must be an 8-bit immediate");
27454 return const0_rtx;
27455 }
27456
27457 if (d->code == IX86_BUILTIN_PCMPESTRI128)
27458 {
27459 if (optimize || !target
27460 || GET_MODE (target) != tmode0
27461 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27462 target = gen_reg_rtx (tmode0);
27463
27464 scratch1 = gen_reg_rtx (tmode1);
27465
27466 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
27467 }
27468 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
27469 {
27470 if (optimize || !target
27471 || GET_MODE (target) != tmode1
27472 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27473 target = gen_reg_rtx (tmode1);
27474
27475 scratch0 = gen_reg_rtx (tmode0);
27476
27477 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
27478 }
27479 else
27480 {
27481 gcc_assert (d->flag);
27482
27483 scratch0 = gen_reg_rtx (tmode0);
27484 scratch1 = gen_reg_rtx (tmode1);
27485
27486 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
27487 }
27488
27489 if (! pat)
27490 return 0;
27491
27492 emit_insn (pat);
27493
27494 if (d->flag)
27495 {
27496 target = gen_reg_rtx (SImode);
27497 emit_move_insn (target, const0_rtx);
27498 target = gen_rtx_SUBREG (QImode, target, 0);
27499
27500 emit_insn
27501 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27502 gen_rtx_fmt_ee (EQ, QImode,
27503 gen_rtx_REG ((enum machine_mode) d->flag,
27504 FLAGS_REG),
27505 const0_rtx)));
27506 return SUBREG_REG (target);
27507 }
27508 else
27509 return target;
27510 }
27511
27512
27513 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27514
27515 static rtx
27516 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
27517 tree exp, rtx target)
27518 {
27519 rtx pat;
27520 tree arg0 = CALL_EXPR_ARG (exp, 0);
27521 tree arg1 = CALL_EXPR_ARG (exp, 1);
27522 tree arg2 = CALL_EXPR_ARG (exp, 2);
27523 rtx scratch0, scratch1;
27524 rtx op0 = expand_normal (arg0);
27525 rtx op1 = expand_normal (arg1);
27526 rtx op2 = expand_normal (arg2);
27527 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
27528
27529 tmode0 = insn_data[d->icode].operand[0].mode;
27530 tmode1 = insn_data[d->icode].operand[1].mode;
27531 modev2 = insn_data[d->icode].operand[2].mode;
27532 modev3 = insn_data[d->icode].operand[3].mode;
27533 modeimm = insn_data[d->icode].operand[4].mode;
27534
27535 if (VECTOR_MODE_P (modev2))
27536 op0 = safe_vector_operand (op0, modev2);
27537 if (VECTOR_MODE_P (modev3))
27538 op1 = safe_vector_operand (op1, modev3);
27539
27540 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27541 op0 = copy_to_mode_reg (modev2, op0);
27542 if ((optimize && !register_operand (op1, modev3))
27543 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
27544 op1 = copy_to_mode_reg (modev3, op1);
27545
27546 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
27547 {
27548 error ("the third argument must be an 8-bit immediate");
27549 return const0_rtx;
27550 }
27551
27552 if (d->code == IX86_BUILTIN_PCMPISTRI128)
27553 {
27554 if (optimize || !target
27555 || GET_MODE (target) != tmode0
27556 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27557 target = gen_reg_rtx (tmode0);
27558
27559 scratch1 = gen_reg_rtx (tmode1);
27560
27561 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
27562 }
27563 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
27564 {
27565 if (optimize || !target
27566 || GET_MODE (target) != tmode1
27567 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27568 target = gen_reg_rtx (tmode1);
27569
27570 scratch0 = gen_reg_rtx (tmode0);
27571
27572 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
27573 }
27574 else
27575 {
27576 gcc_assert (d->flag);
27577
27578 scratch0 = gen_reg_rtx (tmode0);
27579 scratch1 = gen_reg_rtx (tmode1);
27580
27581 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
27582 }
27583
27584 if (! pat)
27585 return 0;
27586
27587 emit_insn (pat);
27588
27589 if (d->flag)
27590 {
27591 target = gen_reg_rtx (SImode);
27592 emit_move_insn (target, const0_rtx);
27593 target = gen_rtx_SUBREG (QImode, target, 0);
27594
27595 emit_insn
27596 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27597 gen_rtx_fmt_ee (EQ, QImode,
27598 gen_rtx_REG ((enum machine_mode) d->flag,
27599 FLAGS_REG),
27600 const0_rtx)));
27601 return SUBREG_REG (target);
27602 }
27603 else
27604 return target;
27605 }
27606
27607 /* Subroutine of ix86_expand_builtin to take care of insns with
27608 variable number of operands. */
27609
27610 static rtx
27611 ix86_expand_args_builtin (const struct builtin_description *d,
27612 tree exp, rtx target)
27613 {
27614 rtx pat, real_target;
27615 unsigned int i, nargs;
27616 unsigned int nargs_constant = 0;
27617 int num_memory = 0;
27618 struct
27619 {
27620 rtx op;
27621 enum machine_mode mode;
27622 } args[4];
27623 bool last_arg_count = false;
27624 enum insn_code icode = d->icode;
27625 const struct insn_data_d *insn_p = &insn_data[icode];
27626 enum machine_mode tmode = insn_p->operand[0].mode;
27627 enum machine_mode rmode = VOIDmode;
27628 bool swap = false;
27629 enum rtx_code comparison = d->comparison;
27630
27631 switch ((enum ix86_builtin_func_type) d->flag)
27632 {
27633 case V2DF_FTYPE_V2DF_ROUND:
27634 case V4DF_FTYPE_V4DF_ROUND:
27635 case V4SF_FTYPE_V4SF_ROUND:
27636 case V8SF_FTYPE_V8SF_ROUND:
27637 return ix86_expand_sse_round (d, exp, target);
27638 case INT_FTYPE_V8SF_V8SF_PTEST:
27639 case INT_FTYPE_V4DI_V4DI_PTEST:
27640 case INT_FTYPE_V4DF_V4DF_PTEST:
27641 case INT_FTYPE_V4SF_V4SF_PTEST:
27642 case INT_FTYPE_V2DI_V2DI_PTEST:
27643 case INT_FTYPE_V2DF_V2DF_PTEST:
27644 return ix86_expand_sse_ptest (d, exp, target);
27645 case FLOAT128_FTYPE_FLOAT128:
27646 case FLOAT_FTYPE_FLOAT:
27647 case INT_FTYPE_INT:
27648 case UINT64_FTYPE_INT:
27649 case UINT16_FTYPE_UINT16:
27650 case INT64_FTYPE_INT64:
27651 case INT64_FTYPE_V4SF:
27652 case INT64_FTYPE_V2DF:
27653 case INT_FTYPE_V16QI:
27654 case INT_FTYPE_V8QI:
27655 case INT_FTYPE_V8SF:
27656 case INT_FTYPE_V4DF:
27657 case INT_FTYPE_V4SF:
27658 case INT_FTYPE_V2DF:
27659 case INT_FTYPE_V32QI:
27660 case V16QI_FTYPE_V16QI:
27661 case V8SI_FTYPE_V8SF:
27662 case V8SI_FTYPE_V4SI:
27663 case V8HI_FTYPE_V8HI:
27664 case V8HI_FTYPE_V16QI:
27665 case V8QI_FTYPE_V8QI:
27666 case V8SF_FTYPE_V8SF:
27667 case V8SF_FTYPE_V8SI:
27668 case V8SF_FTYPE_V4SF:
27669 case V8SF_FTYPE_V8HI:
27670 case V4SI_FTYPE_V4SI:
27671 case V4SI_FTYPE_V16QI:
27672 case V4SI_FTYPE_V4SF:
27673 case V4SI_FTYPE_V8SI:
27674 case V4SI_FTYPE_V8HI:
27675 case V4SI_FTYPE_V4DF:
27676 case V4SI_FTYPE_V2DF:
27677 case V4HI_FTYPE_V4HI:
27678 case V4DF_FTYPE_V4DF:
27679 case V4DF_FTYPE_V4SI:
27680 case V4DF_FTYPE_V4SF:
27681 case V4DF_FTYPE_V2DF:
27682 case V4SF_FTYPE_V4SF:
27683 case V4SF_FTYPE_V4SI:
27684 case V4SF_FTYPE_V8SF:
27685 case V4SF_FTYPE_V4DF:
27686 case V4SF_FTYPE_V8HI:
27687 case V4SF_FTYPE_V2DF:
27688 case V2DI_FTYPE_V2DI:
27689 case V2DI_FTYPE_V16QI:
27690 case V2DI_FTYPE_V8HI:
27691 case V2DI_FTYPE_V4SI:
27692 case V2DF_FTYPE_V2DF:
27693 case V2DF_FTYPE_V4SI:
27694 case V2DF_FTYPE_V4DF:
27695 case V2DF_FTYPE_V4SF:
27696 case V2DF_FTYPE_V2SI:
27697 case V2SI_FTYPE_V2SI:
27698 case V2SI_FTYPE_V4SF:
27699 case V2SI_FTYPE_V2SF:
27700 case V2SI_FTYPE_V2DF:
27701 case V2SF_FTYPE_V2SF:
27702 case V2SF_FTYPE_V2SI:
27703 case V32QI_FTYPE_V32QI:
27704 case V32QI_FTYPE_V16QI:
27705 case V16HI_FTYPE_V16HI:
27706 case V16HI_FTYPE_V8HI:
27707 case V8SI_FTYPE_V8SI:
27708 case V16HI_FTYPE_V16QI:
27709 case V8SI_FTYPE_V16QI:
27710 case V4DI_FTYPE_V16QI:
27711 case V8SI_FTYPE_V8HI:
27712 case V4DI_FTYPE_V8HI:
27713 case V4DI_FTYPE_V4SI:
27714 case V4DI_FTYPE_V2DI:
27715 nargs = 1;
27716 break;
27717 case V4SF_FTYPE_V4SF_VEC_MERGE:
27718 case V2DF_FTYPE_V2DF_VEC_MERGE:
27719 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
27720 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
27721 case V16QI_FTYPE_V16QI_V16QI:
27722 case V16QI_FTYPE_V8HI_V8HI:
27723 case V8QI_FTYPE_V8QI_V8QI:
27724 case V8QI_FTYPE_V4HI_V4HI:
27725 case V8HI_FTYPE_V8HI_V8HI:
27726 case V8HI_FTYPE_V16QI_V16QI:
27727 case V8HI_FTYPE_V4SI_V4SI:
27728 case V8SF_FTYPE_V8SF_V8SF:
27729 case V8SF_FTYPE_V8SF_V8SI:
27730 case V4SI_FTYPE_V4SI_V4SI:
27731 case V4SI_FTYPE_V8HI_V8HI:
27732 case V4SI_FTYPE_V4SF_V4SF:
27733 case V4SI_FTYPE_V2DF_V2DF:
27734 case V4HI_FTYPE_V4HI_V4HI:
27735 case V4HI_FTYPE_V8QI_V8QI:
27736 case V4HI_FTYPE_V2SI_V2SI:
27737 case V4DF_FTYPE_V4DF_V4DF:
27738 case V4DF_FTYPE_V4DF_V4DI:
27739 case V4SF_FTYPE_V4SF_V4SF:
27740 case V4SF_FTYPE_V4SF_V4SI:
27741 case V4SF_FTYPE_V4SF_V2SI:
27742 case V4SF_FTYPE_V4SF_V2DF:
27743 case V4SF_FTYPE_V4SF_DI:
27744 case V4SF_FTYPE_V4SF_SI:
27745 case V2DI_FTYPE_V2DI_V2DI:
27746 case V2DI_FTYPE_V16QI_V16QI:
27747 case V2DI_FTYPE_V4SI_V4SI:
27748 case V2DI_FTYPE_V2DI_V16QI:
27749 case V2DI_FTYPE_V2DF_V2DF:
27750 case V2SI_FTYPE_V2SI_V2SI:
27751 case V2SI_FTYPE_V4HI_V4HI:
27752 case V2SI_FTYPE_V2SF_V2SF:
27753 case V2DF_FTYPE_V2DF_V2DF:
27754 case V2DF_FTYPE_V2DF_V4SF:
27755 case V2DF_FTYPE_V2DF_V2DI:
27756 case V2DF_FTYPE_V2DF_DI:
27757 case V2DF_FTYPE_V2DF_SI:
27758 case V2SF_FTYPE_V2SF_V2SF:
27759 case V1DI_FTYPE_V1DI_V1DI:
27760 case V1DI_FTYPE_V8QI_V8QI:
27761 case V1DI_FTYPE_V2SI_V2SI:
27762 case V32QI_FTYPE_V16HI_V16HI:
27763 case V16HI_FTYPE_V8SI_V8SI:
27764 case V32QI_FTYPE_V32QI_V32QI:
27765 case V16HI_FTYPE_V32QI_V32QI:
27766 case V16HI_FTYPE_V16HI_V16HI:
27767 case V8SI_FTYPE_V8SI_V8SI:
27768 case V8SI_FTYPE_V16HI_V16HI:
27769 case V4DI_FTYPE_V4DI_V4DI:
27770 case V4DI_FTYPE_V8SI_V8SI:
27771 if (comparison == UNKNOWN)
27772 return ix86_expand_binop_builtin (icode, exp, target);
27773 nargs = 2;
27774 break;
27775 case V4SF_FTYPE_V4SF_V4SF_SWAP:
27776 case V2DF_FTYPE_V2DF_V2DF_SWAP:
27777 gcc_assert (comparison != UNKNOWN);
27778 nargs = 2;
27779 swap = true;
27780 break;
27781 case V16HI_FTYPE_V16HI_V8HI_COUNT:
27782 case V16HI_FTYPE_V16HI_SI_COUNT:
27783 case V8SI_FTYPE_V8SI_V4SI_COUNT:
27784 case V8SI_FTYPE_V8SI_SI_COUNT:
27785 case V4DI_FTYPE_V4DI_V2DI_COUNT:
27786 case V4DI_FTYPE_V4DI_INT_COUNT:
27787 case V8HI_FTYPE_V8HI_V8HI_COUNT:
27788 case V8HI_FTYPE_V8HI_SI_COUNT:
27789 case V4SI_FTYPE_V4SI_V4SI_COUNT:
27790 case V4SI_FTYPE_V4SI_SI_COUNT:
27791 case V4HI_FTYPE_V4HI_V4HI_COUNT:
27792 case V4HI_FTYPE_V4HI_SI_COUNT:
27793 case V2DI_FTYPE_V2DI_V2DI_COUNT:
27794 case V2DI_FTYPE_V2DI_SI_COUNT:
27795 case V2SI_FTYPE_V2SI_V2SI_COUNT:
27796 case V2SI_FTYPE_V2SI_SI_COUNT:
27797 case V1DI_FTYPE_V1DI_V1DI_COUNT:
27798 case V1DI_FTYPE_V1DI_SI_COUNT:
27799 nargs = 2;
27800 last_arg_count = true;
27801 break;
27802 case UINT64_FTYPE_UINT64_UINT64:
27803 case UINT_FTYPE_UINT_UINT:
27804 case UINT_FTYPE_UINT_USHORT:
27805 case UINT_FTYPE_UINT_UCHAR:
27806 case UINT16_FTYPE_UINT16_INT:
27807 case UINT8_FTYPE_UINT8_INT:
27808 nargs = 2;
27809 break;
27810 case V2DI_FTYPE_V2DI_INT_CONVERT:
27811 nargs = 2;
27812 rmode = V1TImode;
27813 nargs_constant = 1;
27814 break;
27815 case V8HI_FTYPE_V8HI_INT:
27816 case V8HI_FTYPE_V8SF_INT:
27817 case V8HI_FTYPE_V4SF_INT:
27818 case V8SF_FTYPE_V8SF_INT:
27819 case V4SI_FTYPE_V4SI_INT:
27820 case V4SI_FTYPE_V8SI_INT:
27821 case V4HI_FTYPE_V4HI_INT:
27822 case V4DF_FTYPE_V4DF_INT:
27823 case V4SF_FTYPE_V4SF_INT:
27824 case V4SF_FTYPE_V8SF_INT:
27825 case V2DI_FTYPE_V2DI_INT:
27826 case V2DF_FTYPE_V2DF_INT:
27827 case V2DF_FTYPE_V4DF_INT:
27828 case V16HI_FTYPE_V16HI_INT:
27829 case V8SI_FTYPE_V8SI_INT:
27830 case V4DI_FTYPE_V4DI_INT:
27831 case V2DI_FTYPE_V4DI_INT:
27832 nargs = 2;
27833 nargs_constant = 1;
27834 break;
27835 case V16QI_FTYPE_V16QI_V16QI_V16QI:
27836 case V8SF_FTYPE_V8SF_V8SF_V8SF:
27837 case V4DF_FTYPE_V4DF_V4DF_V4DF:
27838 case V4SF_FTYPE_V4SF_V4SF_V4SF:
27839 case V2DF_FTYPE_V2DF_V2DF_V2DF:
27840 case V32QI_FTYPE_V32QI_V32QI_V32QI:
27841 nargs = 3;
27842 break;
27843 case V32QI_FTYPE_V32QI_V32QI_INT:
27844 case V16HI_FTYPE_V16HI_V16HI_INT:
27845 case V16QI_FTYPE_V16QI_V16QI_INT:
27846 case V4DI_FTYPE_V4DI_V4DI_INT:
27847 case V8HI_FTYPE_V8HI_V8HI_INT:
27848 case V8SI_FTYPE_V8SI_V8SI_INT:
27849 case V8SI_FTYPE_V8SI_V4SI_INT:
27850 case V8SF_FTYPE_V8SF_V8SF_INT:
27851 case V8SF_FTYPE_V8SF_V4SF_INT:
27852 case V4SI_FTYPE_V4SI_V4SI_INT:
27853 case V4DF_FTYPE_V4DF_V4DF_INT:
27854 case V4DF_FTYPE_V4DF_V2DF_INT:
27855 case V4SF_FTYPE_V4SF_V4SF_INT:
27856 case V2DI_FTYPE_V2DI_V2DI_INT:
27857 case V4DI_FTYPE_V4DI_V2DI_INT:
27858 case V2DF_FTYPE_V2DF_V2DF_INT:
27859 nargs = 3;
27860 nargs_constant = 1;
27861 break;
27862 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27863 nargs = 3;
27864 rmode = V4DImode;
27865 nargs_constant = 1;
27866 break;
27867 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27868 nargs = 3;
27869 rmode = V2DImode;
27870 nargs_constant = 1;
27871 break;
27872 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27873 nargs = 3;
27874 rmode = DImode;
27875 nargs_constant = 1;
27876 break;
27877 case V2DI_FTYPE_V2DI_UINT_UINT:
27878 nargs = 3;
27879 nargs_constant = 2;
27880 break;
27881 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
27882 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
27883 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
27884 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
27885 nargs = 4;
27886 nargs_constant = 1;
27887 break;
27888 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
27889 nargs = 4;
27890 nargs_constant = 2;
27891 break;
27892 default:
27893 gcc_unreachable ();
27894 }
27895
27896 gcc_assert (nargs <= ARRAY_SIZE (args));
27897
27898 if (comparison != UNKNOWN)
27899 {
27900 gcc_assert (nargs == 2);
27901 return ix86_expand_sse_compare (d, exp, target, swap);
27902 }
27903
27904 if (rmode == VOIDmode || rmode == tmode)
27905 {
27906 if (optimize
27907 || target == 0
27908 || GET_MODE (target) != tmode
27909 || !insn_p->operand[0].predicate (target, tmode))
27910 target = gen_reg_rtx (tmode);
27911 real_target = target;
27912 }
27913 else
27914 {
27915 target = gen_reg_rtx (rmode);
27916 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
27917 }
27918
27919 for (i = 0; i < nargs; i++)
27920 {
27921 tree arg = CALL_EXPR_ARG (exp, i);
27922 rtx op = expand_normal (arg);
27923 enum machine_mode mode = insn_p->operand[i + 1].mode;
27924 bool match = insn_p->operand[i + 1].predicate (op, mode);
27925
27926 if (last_arg_count && (i + 1) == nargs)
27927 {
27928 /* SIMD shift insns take either an 8-bit immediate or
27929 register as count. But builtin functions take int as
27930 count. If count doesn't match, we put it in register. */
27931 if (!match)
27932 {
27933 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
27934 if (!insn_p->operand[i + 1].predicate (op, mode))
27935 op = copy_to_reg (op);
27936 }
27937 }
27938 else if ((nargs - i) <= nargs_constant)
27939 {
27940 if (!match)
27941 switch (icode)
27942 {
27943 case CODE_FOR_avx2_inserti128:
27944 case CODE_FOR_avx2_extracti128:
27945 error ("the last argument must be an 1-bit immediate");
27946 return const0_rtx;
27947
27948 case CODE_FOR_sse4_1_roundpd:
27949 case CODE_FOR_sse4_1_roundps:
27950 case CODE_FOR_sse4_1_roundsd:
27951 case CODE_FOR_sse4_1_roundss:
27952 case CODE_FOR_sse4_1_blendps:
27953 case CODE_FOR_avx_blendpd256:
27954 case CODE_FOR_avx_vpermilv4df:
27955 case CODE_FOR_avx_roundpd256:
27956 case CODE_FOR_avx_roundps256:
27957 error ("the last argument must be a 4-bit immediate");
27958 return const0_rtx;
27959
27960 case CODE_FOR_sse4_1_blendpd:
27961 case CODE_FOR_avx_vpermilv2df:
27962 case CODE_FOR_xop_vpermil2v2df3:
27963 case CODE_FOR_xop_vpermil2v4sf3:
27964 case CODE_FOR_xop_vpermil2v4df3:
27965 case CODE_FOR_xop_vpermil2v8sf3:
27966 error ("the last argument must be a 2-bit immediate");
27967 return const0_rtx;
27968
27969 case CODE_FOR_avx_vextractf128v4df:
27970 case CODE_FOR_avx_vextractf128v8sf:
27971 case CODE_FOR_avx_vextractf128v8si:
27972 case CODE_FOR_avx_vinsertf128v4df:
27973 case CODE_FOR_avx_vinsertf128v8sf:
27974 case CODE_FOR_avx_vinsertf128v8si:
27975 error ("the last argument must be a 1-bit immediate");
27976 return const0_rtx;
27977
27978 case CODE_FOR_avx_vmcmpv2df3:
27979 case CODE_FOR_avx_vmcmpv4sf3:
27980 case CODE_FOR_avx_cmpv2df3:
27981 case CODE_FOR_avx_cmpv4sf3:
27982 case CODE_FOR_avx_cmpv4df3:
27983 case CODE_FOR_avx_cmpv8sf3:
27984 error ("the last argument must be a 5-bit immediate");
27985 return const0_rtx;
27986
27987 default:
27988 switch (nargs_constant)
27989 {
27990 case 2:
27991 if ((nargs - i) == nargs_constant)
27992 {
27993 error ("the next to last argument must be an 8-bit immediate");
27994 break;
27995 }
27996 case 1:
27997 error ("the last argument must be an 8-bit immediate");
27998 break;
27999 default:
28000 gcc_unreachable ();
28001 }
28002 return const0_rtx;
28003 }
28004 }
28005 else
28006 {
28007 if (VECTOR_MODE_P (mode))
28008 op = safe_vector_operand (op, mode);
28009
28010 /* If we aren't optimizing, only allow one memory operand to
28011 be generated. */
28012 if (memory_operand (op, mode))
28013 num_memory++;
28014
28015 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28016 {
28017 if (optimize || !match || num_memory > 1)
28018 op = copy_to_mode_reg (mode, op);
28019 }
28020 else
28021 {
28022 op = copy_to_reg (op);
28023 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28024 }
28025 }
28026
28027 args[i].op = op;
28028 args[i].mode = mode;
28029 }
28030
28031 switch (nargs)
28032 {
28033 case 1:
28034 pat = GEN_FCN (icode) (real_target, args[0].op);
28035 break;
28036 case 2:
28037 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28038 break;
28039 case 3:
28040 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28041 args[2].op);
28042 break;
28043 case 4:
28044 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28045 args[2].op, args[3].op);
28046 break;
28047 default:
28048 gcc_unreachable ();
28049 }
28050
28051 if (! pat)
28052 return 0;
28053
28054 emit_insn (pat);
28055 return target;
28056 }
28057
28058 /* Subroutine of ix86_expand_builtin to take care of special insns
28059 with variable number of operands. */
28060
28061 static rtx
28062 ix86_expand_special_args_builtin (const struct builtin_description *d,
28063 tree exp, rtx target)
28064 {
28065 tree arg;
28066 rtx pat, op;
28067 unsigned int i, nargs, arg_adjust, memory;
28068 struct
28069 {
28070 rtx op;
28071 enum machine_mode mode;
28072 } args[3];
28073 enum insn_code icode = d->icode;
28074 bool last_arg_constant = false;
28075 const struct insn_data_d *insn_p = &insn_data[icode];
28076 enum machine_mode tmode = insn_p->operand[0].mode;
28077 enum { load, store } klass;
28078
28079 switch ((enum ix86_builtin_func_type) d->flag)
28080 {
28081 case VOID_FTYPE_VOID:
28082 if (icode == CODE_FOR_avx_vzeroupper)
28083 target = GEN_INT (vzeroupper_intrinsic);
28084 emit_insn (GEN_FCN (icode) (target));
28085 return 0;
28086 case VOID_FTYPE_UINT64:
28087 case VOID_FTYPE_UNSIGNED:
28088 nargs = 0;
28089 klass = store;
28090 memory = 0;
28091 break;
28092 break;
28093 case UINT64_FTYPE_VOID:
28094 case UNSIGNED_FTYPE_VOID:
28095 nargs = 0;
28096 klass = load;
28097 memory = 0;
28098 break;
28099 case UINT64_FTYPE_PUNSIGNED:
28100 case V2DI_FTYPE_PV2DI:
28101 case V4DI_FTYPE_PV4DI:
28102 case V32QI_FTYPE_PCCHAR:
28103 case V16QI_FTYPE_PCCHAR:
28104 case V8SF_FTYPE_PCV4SF:
28105 case V8SF_FTYPE_PCFLOAT:
28106 case V4SF_FTYPE_PCFLOAT:
28107 case V4DF_FTYPE_PCV2DF:
28108 case V4DF_FTYPE_PCDOUBLE:
28109 case V2DF_FTYPE_PCDOUBLE:
28110 case VOID_FTYPE_PVOID:
28111 nargs = 1;
28112 klass = load;
28113 memory = 0;
28114 break;
28115 case VOID_FTYPE_PV2SF_V4SF:
28116 case VOID_FTYPE_PV4DI_V4DI:
28117 case VOID_FTYPE_PV2DI_V2DI:
28118 case VOID_FTYPE_PCHAR_V32QI:
28119 case VOID_FTYPE_PCHAR_V16QI:
28120 case VOID_FTYPE_PFLOAT_V8SF:
28121 case VOID_FTYPE_PFLOAT_V4SF:
28122 case VOID_FTYPE_PDOUBLE_V4DF:
28123 case VOID_FTYPE_PDOUBLE_V2DF:
28124 case VOID_FTYPE_PULONGLONG_ULONGLONG:
28125 case VOID_FTYPE_PINT_INT:
28126 nargs = 1;
28127 klass = store;
28128 /* Reserve memory operand for target. */
28129 memory = ARRAY_SIZE (args);
28130 break;
28131 case V4SF_FTYPE_V4SF_PCV2SF:
28132 case V2DF_FTYPE_V2DF_PCDOUBLE:
28133 nargs = 2;
28134 klass = load;
28135 memory = 1;
28136 break;
28137 case V8SF_FTYPE_PCV8SF_V8SI:
28138 case V4DF_FTYPE_PCV4DF_V4DI:
28139 case V4SF_FTYPE_PCV4SF_V4SI:
28140 case V2DF_FTYPE_PCV2DF_V2DI:
28141 case V8SI_FTYPE_PCV8SI_V8SI:
28142 case V4DI_FTYPE_PCV4DI_V4DI:
28143 case V4SI_FTYPE_PCV4SI_V4SI:
28144 case V2DI_FTYPE_PCV2DI_V2DI:
28145 nargs = 2;
28146 klass = load;
28147 memory = 0;
28148 break;
28149 case VOID_FTYPE_PV8SF_V8SI_V8SF:
28150 case VOID_FTYPE_PV4DF_V4DI_V4DF:
28151 case VOID_FTYPE_PV4SF_V4SI_V4SF:
28152 case VOID_FTYPE_PV2DF_V2DI_V2DF:
28153 case VOID_FTYPE_PV8SI_V8SI_V8SI:
28154 case VOID_FTYPE_PV4DI_V4DI_V4DI:
28155 case VOID_FTYPE_PV4SI_V4SI_V4SI:
28156 case VOID_FTYPE_PV2DI_V2DI_V2DI:
28157 nargs = 2;
28158 klass = store;
28159 /* Reserve memory operand for target. */
28160 memory = ARRAY_SIZE (args);
28161 break;
28162 case VOID_FTYPE_UINT_UINT_UINT:
28163 case VOID_FTYPE_UINT64_UINT_UINT:
28164 case UCHAR_FTYPE_UINT_UINT_UINT:
28165 case UCHAR_FTYPE_UINT64_UINT_UINT:
28166 nargs = 3;
28167 klass = load;
28168 memory = ARRAY_SIZE (args);
28169 last_arg_constant = true;
28170 break;
28171 default:
28172 gcc_unreachable ();
28173 }
28174
28175 gcc_assert (nargs <= ARRAY_SIZE (args));
28176
28177 if (klass == store)
28178 {
28179 arg = CALL_EXPR_ARG (exp, 0);
28180 op = expand_normal (arg);
28181 gcc_assert (target == 0);
28182 if (memory)
28183 {
28184 if (GET_MODE (op) != Pmode)
28185 op = convert_to_mode (Pmode, op, 1);
28186 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
28187 }
28188 else
28189 target = force_reg (tmode, op);
28190 arg_adjust = 1;
28191 }
28192 else
28193 {
28194 arg_adjust = 0;
28195 if (optimize
28196 || target == 0
28197 || GET_MODE (target) != tmode
28198 || !insn_p->operand[0].predicate (target, tmode))
28199 target = gen_reg_rtx (tmode);
28200 }
28201
28202 for (i = 0; i < nargs; i++)
28203 {
28204 enum machine_mode mode = insn_p->operand[i + 1].mode;
28205 bool match;
28206
28207 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
28208 op = expand_normal (arg);
28209 match = insn_p->operand[i + 1].predicate (op, mode);
28210
28211 if (last_arg_constant && (i + 1) == nargs)
28212 {
28213 if (!match)
28214 {
28215 if (icode == CODE_FOR_lwp_lwpvalsi3
28216 || icode == CODE_FOR_lwp_lwpinssi3
28217 || icode == CODE_FOR_lwp_lwpvaldi3
28218 || icode == CODE_FOR_lwp_lwpinsdi3)
28219 error ("the last argument must be a 32-bit immediate");
28220 else
28221 error ("the last argument must be an 8-bit immediate");
28222 return const0_rtx;
28223 }
28224 }
28225 else
28226 {
28227 if (i == memory)
28228 {
28229 /* This must be the memory operand. */
28230 if (GET_MODE (op) != Pmode)
28231 op = convert_to_mode (Pmode, op, 1);
28232 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
28233 gcc_assert (GET_MODE (op) == mode
28234 || GET_MODE (op) == VOIDmode);
28235 }
28236 else
28237 {
28238 /* This must be register. */
28239 if (VECTOR_MODE_P (mode))
28240 op = safe_vector_operand (op, mode);
28241
28242 gcc_assert (GET_MODE (op) == mode
28243 || GET_MODE (op) == VOIDmode);
28244 op = copy_to_mode_reg (mode, op);
28245 }
28246 }
28247
28248 args[i].op = op;
28249 args[i].mode = mode;
28250 }
28251
28252 switch (nargs)
28253 {
28254 case 0:
28255 pat = GEN_FCN (icode) (target);
28256 break;
28257 case 1:
28258 pat = GEN_FCN (icode) (target, args[0].op);
28259 break;
28260 case 2:
28261 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28262 break;
28263 case 3:
28264 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28265 break;
28266 default:
28267 gcc_unreachable ();
28268 }
28269
28270 if (! pat)
28271 return 0;
28272 emit_insn (pat);
28273 return klass == store ? 0 : target;
28274 }
28275
28276 /* Return the integer constant in ARG. Constrain it to be in the range
28277 of the subparts of VEC_TYPE; issue an error if not. */
28278
28279 static int
28280 get_element_number (tree vec_type, tree arg)
28281 {
28282 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
28283
28284 if (!host_integerp (arg, 1)
28285 || (elt = tree_low_cst (arg, 1), elt > max))
28286 {
28287 error ("selector must be an integer constant in the range 0..%wi", max);
28288 return 0;
28289 }
28290
28291 return elt;
28292 }
28293
28294 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28295 ix86_expand_vector_init. We DO have language-level syntax for this, in
28296 the form of (type){ init-list }. Except that since we can't place emms
28297 instructions from inside the compiler, we can't allow the use of MMX
28298 registers unless the user explicitly asks for it. So we do *not* define
28299 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28300 we have builtins invoked by mmintrin.h that gives us license to emit
28301 these sorts of instructions. */
28302
28303 static rtx
28304 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
28305 {
28306 enum machine_mode tmode = TYPE_MODE (type);
28307 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
28308 int i, n_elt = GET_MODE_NUNITS (tmode);
28309 rtvec v = rtvec_alloc (n_elt);
28310
28311 gcc_assert (VECTOR_MODE_P (tmode));
28312 gcc_assert (call_expr_nargs (exp) == n_elt);
28313
28314 for (i = 0; i < n_elt; ++i)
28315 {
28316 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
28317 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
28318 }
28319
28320 if (!target || !register_operand (target, tmode))
28321 target = gen_reg_rtx (tmode);
28322
28323 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
28324 return target;
28325 }
28326
28327 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28328 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
28329 had a language-level syntax for referencing vector elements. */
28330
28331 static rtx
28332 ix86_expand_vec_ext_builtin (tree exp, rtx target)
28333 {
28334 enum machine_mode tmode, mode0;
28335 tree arg0, arg1;
28336 int elt;
28337 rtx op0;
28338
28339 arg0 = CALL_EXPR_ARG (exp, 0);
28340 arg1 = CALL_EXPR_ARG (exp, 1);
28341
28342 op0 = expand_normal (arg0);
28343 elt = get_element_number (TREE_TYPE (arg0), arg1);
28344
28345 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28346 mode0 = TYPE_MODE (TREE_TYPE (arg0));
28347 gcc_assert (VECTOR_MODE_P (mode0));
28348
28349 op0 = force_reg (mode0, op0);
28350
28351 if (optimize || !target || !register_operand (target, tmode))
28352 target = gen_reg_rtx (tmode);
28353
28354 ix86_expand_vector_extract (true, target, op0, elt);
28355
28356 return target;
28357 }
28358
28359 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28360 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28361 a language-level syntax for referencing vector elements. */
28362
28363 static rtx
28364 ix86_expand_vec_set_builtin (tree exp)
28365 {
28366 enum machine_mode tmode, mode1;
28367 tree arg0, arg1, arg2;
28368 int elt;
28369 rtx op0, op1, target;
28370
28371 arg0 = CALL_EXPR_ARG (exp, 0);
28372 arg1 = CALL_EXPR_ARG (exp, 1);
28373 arg2 = CALL_EXPR_ARG (exp, 2);
28374
28375 tmode = TYPE_MODE (TREE_TYPE (arg0));
28376 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28377 gcc_assert (VECTOR_MODE_P (tmode));
28378
28379 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
28380 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
28381 elt = get_element_number (TREE_TYPE (arg0), arg2);
28382
28383 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
28384 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
28385
28386 op0 = force_reg (tmode, op0);
28387 op1 = force_reg (mode1, op1);
28388
28389 /* OP0 is the source of these builtin functions and shouldn't be
28390 modified. Create a copy, use it and return it as target. */
28391 target = gen_reg_rtx (tmode);
28392 emit_move_insn (target, op0);
28393 ix86_expand_vector_set (true, target, op1, elt);
28394
28395 return target;
28396 }
28397
28398 /* Expand an expression EXP that calls a built-in function,
28399 with result going to TARGET if that's convenient
28400 (and in mode MODE if that's convenient).
28401 SUBTARGET may be used as the target for computing one of EXP's operands.
28402 IGNORE is nonzero if the value is to be ignored. */
28403
28404 static rtx
28405 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
28406 enum machine_mode mode ATTRIBUTE_UNUSED,
28407 int ignore ATTRIBUTE_UNUSED)
28408 {
28409 const struct builtin_description *d;
28410 size_t i;
28411 enum insn_code icode;
28412 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
28413 tree arg0, arg1, arg2, arg3, arg4;
28414 rtx op0, op1, op2, op3, op4, pat;
28415 enum machine_mode mode0, mode1, mode2, mode3, mode4;
28416 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
28417
28418 /* Determine whether the builtin function is available under the current ISA.
28419 Originally the builtin was not created if it wasn't applicable to the
28420 current ISA based on the command line switches. With function specific
28421 options, we need to check in the context of the function making the call
28422 whether it is supported. */
28423 if (ix86_builtins_isa[fcode].isa
28424 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
28425 {
28426 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
28427 NULL, (enum fpmath_unit) 0, false);
28428
28429 if (!opts)
28430 error ("%qE needs unknown isa option", fndecl);
28431 else
28432 {
28433 gcc_assert (opts != NULL);
28434 error ("%qE needs isa option %s", fndecl, opts);
28435 free (opts);
28436 }
28437 return const0_rtx;
28438 }
28439
28440 switch (fcode)
28441 {
28442 case IX86_BUILTIN_MASKMOVQ:
28443 case IX86_BUILTIN_MASKMOVDQU:
28444 icode = (fcode == IX86_BUILTIN_MASKMOVQ
28445 ? CODE_FOR_mmx_maskmovq
28446 : CODE_FOR_sse2_maskmovdqu);
28447 /* Note the arg order is different from the operand order. */
28448 arg1 = CALL_EXPR_ARG (exp, 0);
28449 arg2 = CALL_EXPR_ARG (exp, 1);
28450 arg0 = CALL_EXPR_ARG (exp, 2);
28451 op0 = expand_normal (arg0);
28452 op1 = expand_normal (arg1);
28453 op2 = expand_normal (arg2);
28454 mode0 = insn_data[icode].operand[0].mode;
28455 mode1 = insn_data[icode].operand[1].mode;
28456 mode2 = insn_data[icode].operand[2].mode;
28457
28458 if (GET_MODE (op0) != Pmode)
28459 op0 = convert_to_mode (Pmode, op0, 1);
28460 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
28461
28462 if (!insn_data[icode].operand[0].predicate (op0, mode0))
28463 op0 = copy_to_mode_reg (mode0, op0);
28464 if (!insn_data[icode].operand[1].predicate (op1, mode1))
28465 op1 = copy_to_mode_reg (mode1, op1);
28466 if (!insn_data[icode].operand[2].predicate (op2, mode2))
28467 op2 = copy_to_mode_reg (mode2, op2);
28468 pat = GEN_FCN (icode) (op0, op1, op2);
28469 if (! pat)
28470 return 0;
28471 emit_insn (pat);
28472 return 0;
28473
28474 case IX86_BUILTIN_LDMXCSR:
28475 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
28476 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28477 emit_move_insn (target, op0);
28478 emit_insn (gen_sse_ldmxcsr (target));
28479 return 0;
28480
28481 case IX86_BUILTIN_STMXCSR:
28482 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28483 emit_insn (gen_sse_stmxcsr (target));
28484 return copy_to_mode_reg (SImode, target);
28485
28486 case IX86_BUILTIN_CLFLUSH:
28487 arg0 = CALL_EXPR_ARG (exp, 0);
28488 op0 = expand_normal (arg0);
28489 icode = CODE_FOR_sse2_clflush;
28490 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28491 {
28492 if (GET_MODE (op0) != Pmode)
28493 op0 = convert_to_mode (Pmode, op0, 1);
28494 op0 = force_reg (Pmode, op0);
28495 }
28496
28497 emit_insn (gen_sse2_clflush (op0));
28498 return 0;
28499
28500 case IX86_BUILTIN_MONITOR:
28501 arg0 = CALL_EXPR_ARG (exp, 0);
28502 arg1 = CALL_EXPR_ARG (exp, 1);
28503 arg2 = CALL_EXPR_ARG (exp, 2);
28504 op0 = expand_normal (arg0);
28505 op1 = expand_normal (arg1);
28506 op2 = expand_normal (arg2);
28507 if (!REG_P (op0))
28508 {
28509 if (GET_MODE (op0) != Pmode)
28510 op0 = convert_to_mode (Pmode, op0, 1);
28511 op0 = force_reg (Pmode, op0);
28512 }
28513 if (!REG_P (op1))
28514 op1 = copy_to_mode_reg (SImode, op1);
28515 if (!REG_P (op2))
28516 op2 = copy_to_mode_reg (SImode, op2);
28517 emit_insn (ix86_gen_monitor (op0, op1, op2));
28518 return 0;
28519
28520 case IX86_BUILTIN_MWAIT:
28521 arg0 = CALL_EXPR_ARG (exp, 0);
28522 arg1 = CALL_EXPR_ARG (exp, 1);
28523 op0 = expand_normal (arg0);
28524 op1 = expand_normal (arg1);
28525 if (!REG_P (op0))
28526 op0 = copy_to_mode_reg (SImode, op0);
28527 if (!REG_P (op1))
28528 op1 = copy_to_mode_reg (SImode, op1);
28529 emit_insn (gen_sse3_mwait (op0, op1));
28530 return 0;
28531
28532 case IX86_BUILTIN_VEC_INIT_V2SI:
28533 case IX86_BUILTIN_VEC_INIT_V4HI:
28534 case IX86_BUILTIN_VEC_INIT_V8QI:
28535 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
28536
28537 case IX86_BUILTIN_VEC_EXT_V2DF:
28538 case IX86_BUILTIN_VEC_EXT_V2DI:
28539 case IX86_BUILTIN_VEC_EXT_V4SF:
28540 case IX86_BUILTIN_VEC_EXT_V4SI:
28541 case IX86_BUILTIN_VEC_EXT_V8HI:
28542 case IX86_BUILTIN_VEC_EXT_V2SI:
28543 case IX86_BUILTIN_VEC_EXT_V4HI:
28544 case IX86_BUILTIN_VEC_EXT_V16QI:
28545 return ix86_expand_vec_ext_builtin (exp, target);
28546
28547 case IX86_BUILTIN_VEC_SET_V2DI:
28548 case IX86_BUILTIN_VEC_SET_V4SF:
28549 case IX86_BUILTIN_VEC_SET_V4SI:
28550 case IX86_BUILTIN_VEC_SET_V8HI:
28551 case IX86_BUILTIN_VEC_SET_V4HI:
28552 case IX86_BUILTIN_VEC_SET_V16QI:
28553 return ix86_expand_vec_set_builtin (exp);
28554
28555 case IX86_BUILTIN_VEC_PERM_V2DF:
28556 case IX86_BUILTIN_VEC_PERM_V4SF:
28557 case IX86_BUILTIN_VEC_PERM_V2DI:
28558 case IX86_BUILTIN_VEC_PERM_V4SI:
28559 case IX86_BUILTIN_VEC_PERM_V8HI:
28560 case IX86_BUILTIN_VEC_PERM_V16QI:
28561 case IX86_BUILTIN_VEC_PERM_V2DI_U:
28562 case IX86_BUILTIN_VEC_PERM_V4SI_U:
28563 case IX86_BUILTIN_VEC_PERM_V8HI_U:
28564 case IX86_BUILTIN_VEC_PERM_V16QI_U:
28565 case IX86_BUILTIN_VEC_PERM_V4DF:
28566 case IX86_BUILTIN_VEC_PERM_V8SF:
28567 return ix86_expand_vec_perm_builtin (exp);
28568
28569 case IX86_BUILTIN_INFQ:
28570 case IX86_BUILTIN_HUGE_VALQ:
28571 {
28572 REAL_VALUE_TYPE inf;
28573 rtx tmp;
28574
28575 real_inf (&inf);
28576 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
28577
28578 tmp = validize_mem (force_const_mem (mode, tmp));
28579
28580 if (target == 0)
28581 target = gen_reg_rtx (mode);
28582
28583 emit_move_insn (target, tmp);
28584 return target;
28585 }
28586
28587 case IX86_BUILTIN_LLWPCB:
28588 arg0 = CALL_EXPR_ARG (exp, 0);
28589 op0 = expand_normal (arg0);
28590 icode = CODE_FOR_lwp_llwpcb;
28591 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28592 {
28593 if (GET_MODE (op0) != Pmode)
28594 op0 = convert_to_mode (Pmode, op0, 1);
28595 op0 = force_reg (Pmode, op0);
28596 }
28597 emit_insn (gen_lwp_llwpcb (op0));
28598 return 0;
28599
28600 case IX86_BUILTIN_SLWPCB:
28601 icode = CODE_FOR_lwp_slwpcb;
28602 if (!target
28603 || !insn_data[icode].operand[0].predicate (target, Pmode))
28604 target = gen_reg_rtx (Pmode);
28605 emit_insn (gen_lwp_slwpcb (target));
28606 return target;
28607
28608 case IX86_BUILTIN_BEXTRI32:
28609 case IX86_BUILTIN_BEXTRI64:
28610 arg0 = CALL_EXPR_ARG (exp, 0);
28611 arg1 = CALL_EXPR_ARG (exp, 1);
28612 op0 = expand_normal (arg0);
28613 op1 = expand_normal (arg1);
28614 icode = (fcode == IX86_BUILTIN_BEXTRI32
28615 ? CODE_FOR_tbm_bextri_si
28616 : CODE_FOR_tbm_bextri_di);
28617 if (!CONST_INT_P (op1))
28618 {
28619 error ("last argument must be an immediate");
28620 return const0_rtx;
28621 }
28622 else
28623 {
28624 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
28625 unsigned char lsb_index = INTVAL (op1) & 0xFF;
28626 op1 = GEN_INT (length);
28627 op2 = GEN_INT (lsb_index);
28628 pat = GEN_FCN (icode) (target, op0, op1, op2);
28629 if (pat)
28630 emit_insn (pat);
28631 return target;
28632 }
28633
28634 case IX86_BUILTIN_RDRAND16_STEP:
28635 icode = CODE_FOR_rdrandhi_1;
28636 mode0 = HImode;
28637 goto rdrand_step;
28638
28639 case IX86_BUILTIN_RDRAND32_STEP:
28640 icode = CODE_FOR_rdrandsi_1;
28641 mode0 = SImode;
28642 goto rdrand_step;
28643
28644 case IX86_BUILTIN_RDRAND64_STEP:
28645 icode = CODE_FOR_rdranddi_1;
28646 mode0 = DImode;
28647
28648 rdrand_step:
28649 op0 = gen_reg_rtx (mode0);
28650 emit_insn (GEN_FCN (icode) (op0));
28651
28652 arg0 = CALL_EXPR_ARG (exp, 0);
28653 op1 = expand_normal (arg0);
28654 if (!address_operand (op1, VOIDmode))
28655 {
28656 op1 = convert_memory_address (Pmode, op1);
28657 op1 = copy_addr_to_reg (op1);
28658 }
28659 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
28660
28661 op1 = gen_reg_rtx (SImode);
28662 emit_move_insn (op1, CONST1_RTX (SImode));
28663
28664 /* Emit SImode conditional move. */
28665 if (mode0 == HImode)
28666 {
28667 op2 = gen_reg_rtx (SImode);
28668 emit_insn (gen_zero_extendhisi2 (op2, op0));
28669 }
28670 else if (mode0 == SImode)
28671 op2 = op0;
28672 else
28673 op2 = gen_rtx_SUBREG (SImode, op0, 0);
28674
28675 if (target == 0)
28676 target = gen_reg_rtx (SImode);
28677
28678 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
28679 const0_rtx);
28680 emit_insn (gen_rtx_SET (VOIDmode, target,
28681 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
28682 return target;
28683
28684 case IX86_BUILTIN_GATHERSIV2DF:
28685 icode = CODE_FOR_avx2_gathersiv2df;
28686 goto gather_gen;
28687 case IX86_BUILTIN_GATHERSIV4DF:
28688 icode = CODE_FOR_avx2_gathersiv4df;
28689 goto gather_gen;
28690 case IX86_BUILTIN_GATHERDIV2DF:
28691 icode = CODE_FOR_avx2_gatherdiv2df;
28692 goto gather_gen;
28693 case IX86_BUILTIN_GATHERDIV4DF:
28694 icode = CODE_FOR_avx2_gatherdiv4df;
28695 goto gather_gen;
28696 case IX86_BUILTIN_GATHERSIV4SF:
28697 icode = CODE_FOR_avx2_gathersiv4sf;
28698 goto gather_gen;
28699 case IX86_BUILTIN_GATHERSIV8SF:
28700 icode = CODE_FOR_avx2_gathersiv8sf;
28701 goto gather_gen;
28702 case IX86_BUILTIN_GATHERDIV4SF:
28703 icode = CODE_FOR_avx2_gatherdiv4sf;
28704 goto gather_gen;
28705 case IX86_BUILTIN_GATHERDIV8SF:
28706 icode = CODE_FOR_avx2_gatherdiv4sf256;
28707 goto gather_gen;
28708 case IX86_BUILTIN_GATHERSIV2DI:
28709 icode = CODE_FOR_avx2_gathersiv2di;
28710 goto gather_gen;
28711 case IX86_BUILTIN_GATHERSIV4DI:
28712 icode = CODE_FOR_avx2_gathersiv4di;
28713 goto gather_gen;
28714 case IX86_BUILTIN_GATHERDIV2DI:
28715 icode = CODE_FOR_avx2_gatherdiv2di;
28716 goto gather_gen;
28717 case IX86_BUILTIN_GATHERDIV4DI:
28718 icode = CODE_FOR_avx2_gatherdiv4di;
28719 goto gather_gen;
28720 case IX86_BUILTIN_GATHERSIV4SI:
28721 icode = CODE_FOR_avx2_gathersiv4si;
28722 goto gather_gen;
28723 case IX86_BUILTIN_GATHERSIV8SI:
28724 icode = CODE_FOR_avx2_gathersiv8si;
28725 goto gather_gen;
28726 case IX86_BUILTIN_GATHERDIV4SI:
28727 icode = CODE_FOR_avx2_gatherdiv4si;
28728 goto gather_gen;
28729 case IX86_BUILTIN_GATHERDIV8SI:
28730 icode = CODE_FOR_avx2_gatherdiv4si256;
28731
28732 gather_gen:
28733 arg0 = CALL_EXPR_ARG (exp, 0);
28734 arg1 = CALL_EXPR_ARG (exp, 1);
28735 arg2 = CALL_EXPR_ARG (exp, 2);
28736 arg3 = CALL_EXPR_ARG (exp, 3);
28737 arg4 = CALL_EXPR_ARG (exp, 4);
28738 op0 = expand_normal (arg0);
28739 op1 = expand_normal (arg1);
28740 op2 = expand_normal (arg2);
28741 op3 = expand_normal (arg3);
28742 op4 = expand_normal (arg4);
28743 /* Note the arg order is different from the operand order. */
28744 mode0 = insn_data[icode].operand[1].mode;
28745 mode1 = insn_data[icode].operand[2].mode;
28746 mode2 = insn_data[icode].operand[3].mode;
28747 mode3 = insn_data[icode].operand[4].mode;
28748 mode4 = insn_data[icode].operand[5].mode;
28749
28750 if (target == NULL_RTX)
28751 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
28752
28753 /* Force memory operand only with base register here. But we
28754 don't want to do it on memory operand for other builtin
28755 functions. */
28756 if (GET_MODE (op1) != Pmode)
28757 op1 = convert_to_mode (Pmode, op1, 1);
28758 op1 = force_reg (Pmode, op1);
28759 op1 = gen_rtx_MEM (mode1, op1);
28760
28761 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28762 op0 = copy_to_mode_reg (mode0, op0);
28763 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28764 op1 = copy_to_mode_reg (mode1, op1);
28765 if (!insn_data[icode].operand[3].predicate (op2, mode2))
28766 op2 = copy_to_mode_reg (mode2, op2);
28767 if (!insn_data[icode].operand[4].predicate (op3, mode3))
28768 op3 = copy_to_mode_reg (mode3, op3);
28769 if (!insn_data[icode].operand[5].predicate (op4, mode4))
28770 {
28771 error ("last argument must be scale 1, 2, 4, 8");
28772 return const0_rtx;
28773 }
28774 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
28775 if (! pat)
28776 return const0_rtx;
28777 emit_insn (pat);
28778 return target;
28779
28780 default:
28781 break;
28782 }
28783
28784 for (i = 0, d = bdesc_special_args;
28785 i < ARRAY_SIZE (bdesc_special_args);
28786 i++, d++)
28787 if (d->code == fcode)
28788 return ix86_expand_special_args_builtin (d, exp, target);
28789
28790 for (i = 0, d = bdesc_args;
28791 i < ARRAY_SIZE (bdesc_args);
28792 i++, d++)
28793 if (d->code == fcode)
28794 switch (fcode)
28795 {
28796 case IX86_BUILTIN_FABSQ:
28797 case IX86_BUILTIN_COPYSIGNQ:
28798 if (!TARGET_SSE2)
28799 /* Emit a normal call if SSE2 isn't available. */
28800 return expand_call (exp, target, ignore);
28801 default:
28802 return ix86_expand_args_builtin (d, exp, target);
28803 }
28804
28805 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28806 if (d->code == fcode)
28807 return ix86_expand_sse_comi (d, exp, target);
28808
28809 for (i = 0, d = bdesc_pcmpestr;
28810 i < ARRAY_SIZE (bdesc_pcmpestr);
28811 i++, d++)
28812 if (d->code == fcode)
28813 return ix86_expand_sse_pcmpestr (d, exp, target);
28814
28815 for (i = 0, d = bdesc_pcmpistr;
28816 i < ARRAY_SIZE (bdesc_pcmpistr);
28817 i++, d++)
28818 if (d->code == fcode)
28819 return ix86_expand_sse_pcmpistr (d, exp, target);
28820
28821 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28822 if (d->code == fcode)
28823 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
28824 (enum ix86_builtin_func_type)
28825 d->flag, d->comparison);
28826
28827 gcc_unreachable ();
28828 }
28829
28830 /* Returns a function decl for a vectorized version of the builtin function
28831 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28832 if it is not available. */
28833
28834 static tree
28835 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
28836 tree type_in)
28837 {
28838 enum machine_mode in_mode, out_mode;
28839 int in_n, out_n;
28840 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28841
28842 if (TREE_CODE (type_out) != VECTOR_TYPE
28843 || TREE_CODE (type_in) != VECTOR_TYPE
28844 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
28845 return NULL_TREE;
28846
28847 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28848 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28849 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28850 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28851
28852 switch (fn)
28853 {
28854 case BUILT_IN_SQRT:
28855 if (out_mode == DFmode && in_mode == DFmode)
28856 {
28857 if (out_n == 2 && in_n == 2)
28858 return ix86_builtins[IX86_BUILTIN_SQRTPD];
28859 else if (out_n == 4 && in_n == 4)
28860 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28861 }
28862 break;
28863
28864 case BUILT_IN_SQRTF:
28865 if (out_mode == SFmode && in_mode == SFmode)
28866 {
28867 if (out_n == 4 && in_n == 4)
28868 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28869 else if (out_n == 8 && in_n == 8)
28870 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28871 }
28872 break;
28873
28874 case BUILT_IN_LRINT:
28875 if (out_mode == SImode && out_n == 4
28876 && in_mode == DFmode && in_n == 2)
28877 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28878 break;
28879
28880 case BUILT_IN_LRINTF:
28881 if (out_mode == SImode && in_mode == SFmode)
28882 {
28883 if (out_n == 4 && in_n == 4)
28884 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28885 else if (out_n == 8 && in_n == 8)
28886 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28887 }
28888 break;
28889
28890 case BUILT_IN_COPYSIGN:
28891 if (out_mode == DFmode && in_mode == DFmode)
28892 {
28893 if (out_n == 2 && in_n == 2)
28894 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
28895 else if (out_n == 4 && in_n == 4)
28896 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
28897 }
28898 break;
28899
28900 case BUILT_IN_COPYSIGNF:
28901 if (out_mode == SFmode && in_mode == SFmode)
28902 {
28903 if (out_n == 4 && in_n == 4)
28904 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
28905 else if (out_n == 8 && in_n == 8)
28906 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
28907 }
28908 break;
28909
28910 case BUILT_IN_FLOOR:
28911 /* The round insn does not trap on denormals. */
28912 if (flag_trapping_math || !TARGET_ROUND)
28913 break;
28914
28915 if (out_mode == DFmode && in_mode == DFmode)
28916 {
28917 if (out_n == 2 && in_n == 2)
28918 return ix86_builtins[IX86_BUILTIN_FLOORPD];
28919 else if (out_n == 4 && in_n == 4)
28920 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
28921 }
28922 break;
28923
28924 case BUILT_IN_FLOORF:
28925 /* The round insn does not trap on denormals. */
28926 if (flag_trapping_math || !TARGET_ROUND)
28927 break;
28928
28929 if (out_mode == SFmode && in_mode == SFmode)
28930 {
28931 if (out_n == 4 && in_n == 4)
28932 return ix86_builtins[IX86_BUILTIN_FLOORPS];
28933 else if (out_n == 8 && in_n == 8)
28934 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
28935 }
28936 break;
28937
28938 case BUILT_IN_CEIL:
28939 /* The round insn does not trap on denormals. */
28940 if (flag_trapping_math || !TARGET_ROUND)
28941 break;
28942
28943 if (out_mode == DFmode && in_mode == DFmode)
28944 {
28945 if (out_n == 2 && in_n == 2)
28946 return ix86_builtins[IX86_BUILTIN_CEILPD];
28947 else if (out_n == 4 && in_n == 4)
28948 return ix86_builtins[IX86_BUILTIN_CEILPD256];
28949 }
28950 break;
28951
28952 case BUILT_IN_CEILF:
28953 /* The round insn does not trap on denormals. */
28954 if (flag_trapping_math || !TARGET_ROUND)
28955 break;
28956
28957 if (out_mode == SFmode && in_mode == SFmode)
28958 {
28959 if (out_n == 4 && in_n == 4)
28960 return ix86_builtins[IX86_BUILTIN_CEILPS];
28961 else if (out_n == 8 && in_n == 8)
28962 return ix86_builtins[IX86_BUILTIN_CEILPS256];
28963 }
28964 break;
28965
28966 case BUILT_IN_TRUNC:
28967 /* The round insn does not trap on denormals. */
28968 if (flag_trapping_math || !TARGET_ROUND)
28969 break;
28970
28971 if (out_mode == DFmode && in_mode == DFmode)
28972 {
28973 if (out_n == 2 && in_n == 2)
28974 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
28975 else if (out_n == 4 && in_n == 4)
28976 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
28977 }
28978 break;
28979
28980 case BUILT_IN_TRUNCF:
28981 /* The round insn does not trap on denormals. */
28982 if (flag_trapping_math || !TARGET_ROUND)
28983 break;
28984
28985 if (out_mode == SFmode && in_mode == SFmode)
28986 {
28987 if (out_n == 4 && in_n == 4)
28988 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
28989 else if (out_n == 8 && in_n == 8)
28990 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
28991 }
28992 break;
28993
28994 case BUILT_IN_RINT:
28995 /* The round insn does not trap on denormals. */
28996 if (flag_trapping_math || !TARGET_ROUND)
28997 break;
28998
28999 if (out_mode == DFmode && in_mode == DFmode)
29000 {
29001 if (out_n == 2 && in_n == 2)
29002 return ix86_builtins[IX86_BUILTIN_RINTPD];
29003 else if (out_n == 4 && in_n == 4)
29004 return ix86_builtins[IX86_BUILTIN_RINTPD256];
29005 }
29006 break;
29007
29008 case BUILT_IN_RINTF:
29009 /* The round insn does not trap on denormals. */
29010 if (flag_trapping_math || !TARGET_ROUND)
29011 break;
29012
29013 if (out_mode == SFmode && in_mode == SFmode)
29014 {
29015 if (out_n == 4 && in_n == 4)
29016 return ix86_builtins[IX86_BUILTIN_RINTPS];
29017 else if (out_n == 8 && in_n == 8)
29018 return ix86_builtins[IX86_BUILTIN_RINTPS256];
29019 }
29020 break;
29021
29022 case BUILT_IN_ROUND:
29023 /* The round insn does not trap on denormals. */
29024 if (flag_trapping_math || !TARGET_ROUND)
29025 break;
29026
29027 if (out_mode == DFmode && in_mode == DFmode)
29028 {
29029 if (out_n == 2 && in_n == 2)
29030 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
29031 else if (out_n == 4 && in_n == 4)
29032 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
29033 }
29034 break;
29035
29036 case BUILT_IN_ROUNDF:
29037 /* The round insn does not trap on denormals. */
29038 if (flag_trapping_math || !TARGET_ROUND)
29039 break;
29040
29041 if (out_mode == SFmode && in_mode == SFmode)
29042 {
29043 if (out_n == 4 && in_n == 4)
29044 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
29045 else if (out_n == 8 && in_n == 8)
29046 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
29047 }
29048 break;
29049
29050 case BUILT_IN_FMA:
29051 if (out_mode == DFmode && in_mode == DFmode)
29052 {
29053 if (out_n == 2 && in_n == 2)
29054 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
29055 if (out_n == 4 && in_n == 4)
29056 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
29057 }
29058 break;
29059
29060 case BUILT_IN_FMAF:
29061 if (out_mode == SFmode && in_mode == SFmode)
29062 {
29063 if (out_n == 4 && in_n == 4)
29064 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
29065 if (out_n == 8 && in_n == 8)
29066 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
29067 }
29068 break;
29069
29070 default:
29071 break;
29072 }
29073
29074 /* Dispatch to a handler for a vectorization library. */
29075 if (ix86_veclib_handler)
29076 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
29077 type_in);
29078
29079 return NULL_TREE;
29080 }
29081
29082 /* Handler for an SVML-style interface to
29083 a library with vectorized intrinsics. */
29084
29085 static tree
29086 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
29087 {
29088 char name[20];
29089 tree fntype, new_fndecl, args;
29090 unsigned arity;
29091 const char *bname;
29092 enum machine_mode el_mode, in_mode;
29093 int n, in_n;
29094
29095 /* The SVML is suitable for unsafe math only. */
29096 if (!flag_unsafe_math_optimizations)
29097 return NULL_TREE;
29098
29099 el_mode = TYPE_MODE (TREE_TYPE (type_out));
29100 n = TYPE_VECTOR_SUBPARTS (type_out);
29101 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29102 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29103 if (el_mode != in_mode
29104 || n != in_n)
29105 return NULL_TREE;
29106
29107 switch (fn)
29108 {
29109 case BUILT_IN_EXP:
29110 case BUILT_IN_LOG:
29111 case BUILT_IN_LOG10:
29112 case BUILT_IN_POW:
29113 case BUILT_IN_TANH:
29114 case BUILT_IN_TAN:
29115 case BUILT_IN_ATAN:
29116 case BUILT_IN_ATAN2:
29117 case BUILT_IN_ATANH:
29118 case BUILT_IN_CBRT:
29119 case BUILT_IN_SINH:
29120 case BUILT_IN_SIN:
29121 case BUILT_IN_ASINH:
29122 case BUILT_IN_ASIN:
29123 case BUILT_IN_COSH:
29124 case BUILT_IN_COS:
29125 case BUILT_IN_ACOSH:
29126 case BUILT_IN_ACOS:
29127 if (el_mode != DFmode || n != 2)
29128 return NULL_TREE;
29129 break;
29130
29131 case BUILT_IN_EXPF:
29132 case BUILT_IN_LOGF:
29133 case BUILT_IN_LOG10F:
29134 case BUILT_IN_POWF:
29135 case BUILT_IN_TANHF:
29136 case BUILT_IN_TANF:
29137 case BUILT_IN_ATANF:
29138 case BUILT_IN_ATAN2F:
29139 case BUILT_IN_ATANHF:
29140 case BUILT_IN_CBRTF:
29141 case BUILT_IN_SINHF:
29142 case BUILT_IN_SINF:
29143 case BUILT_IN_ASINHF:
29144 case BUILT_IN_ASINF:
29145 case BUILT_IN_COSHF:
29146 case BUILT_IN_COSF:
29147 case BUILT_IN_ACOSHF:
29148 case BUILT_IN_ACOSF:
29149 if (el_mode != SFmode || n != 4)
29150 return NULL_TREE;
29151 break;
29152
29153 default:
29154 return NULL_TREE;
29155 }
29156
29157 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
29158
29159 if (fn == BUILT_IN_LOGF)
29160 strcpy (name, "vmlsLn4");
29161 else if (fn == BUILT_IN_LOG)
29162 strcpy (name, "vmldLn2");
29163 else if (n == 4)
29164 {
29165 sprintf (name, "vmls%s", bname+10);
29166 name[strlen (name)-1] = '4';
29167 }
29168 else
29169 sprintf (name, "vmld%s2", bname+10);
29170
29171 /* Convert to uppercase. */
29172 name[4] &= ~0x20;
29173
29174 arity = 0;
29175 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
29176 args = TREE_CHAIN (args))
29177 arity++;
29178
29179 if (arity == 1)
29180 fntype = build_function_type_list (type_out, type_in, NULL);
29181 else
29182 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29183
29184 /* Build a function declaration for the vectorized function. */
29185 new_fndecl = build_decl (BUILTINS_LOCATION,
29186 FUNCTION_DECL, get_identifier (name), fntype);
29187 TREE_PUBLIC (new_fndecl) = 1;
29188 DECL_EXTERNAL (new_fndecl) = 1;
29189 DECL_IS_NOVOPS (new_fndecl) = 1;
29190 TREE_READONLY (new_fndecl) = 1;
29191
29192 return new_fndecl;
29193 }
29194
29195 /* Handler for an ACML-style interface to
29196 a library with vectorized intrinsics. */
29197
29198 static tree
29199 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
29200 {
29201 char name[20] = "__vr.._";
29202 tree fntype, new_fndecl, args;
29203 unsigned arity;
29204 const char *bname;
29205 enum machine_mode el_mode, in_mode;
29206 int n, in_n;
29207
29208 /* The ACML is 64bits only and suitable for unsafe math only as
29209 it does not correctly support parts of IEEE with the required
29210 precision such as denormals. */
29211 if (!TARGET_64BIT
29212 || !flag_unsafe_math_optimizations)
29213 return NULL_TREE;
29214
29215 el_mode = TYPE_MODE (TREE_TYPE (type_out));
29216 n = TYPE_VECTOR_SUBPARTS (type_out);
29217 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29218 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29219 if (el_mode != in_mode
29220 || n != in_n)
29221 return NULL_TREE;
29222
29223 switch (fn)
29224 {
29225 case BUILT_IN_SIN:
29226 case BUILT_IN_COS:
29227 case BUILT_IN_EXP:
29228 case BUILT_IN_LOG:
29229 case BUILT_IN_LOG2:
29230 case BUILT_IN_LOG10:
29231 name[4] = 'd';
29232 name[5] = '2';
29233 if (el_mode != DFmode
29234 || n != 2)
29235 return NULL_TREE;
29236 break;
29237
29238 case BUILT_IN_SINF:
29239 case BUILT_IN_COSF:
29240 case BUILT_IN_EXPF:
29241 case BUILT_IN_POWF:
29242 case BUILT_IN_LOGF:
29243 case BUILT_IN_LOG2F:
29244 case BUILT_IN_LOG10F:
29245 name[4] = 's';
29246 name[5] = '4';
29247 if (el_mode != SFmode
29248 || n != 4)
29249 return NULL_TREE;
29250 break;
29251
29252 default:
29253 return NULL_TREE;
29254 }
29255
29256 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
29257 sprintf (name + 7, "%s", bname+10);
29258
29259 arity = 0;
29260 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
29261 args = TREE_CHAIN (args))
29262 arity++;
29263
29264 if (arity == 1)
29265 fntype = build_function_type_list (type_out, type_in, NULL);
29266 else
29267 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
29268
29269 /* Build a function declaration for the vectorized function. */
29270 new_fndecl = build_decl (BUILTINS_LOCATION,
29271 FUNCTION_DECL, get_identifier (name), fntype);
29272 TREE_PUBLIC (new_fndecl) = 1;
29273 DECL_EXTERNAL (new_fndecl) = 1;
29274 DECL_IS_NOVOPS (new_fndecl) = 1;
29275 TREE_READONLY (new_fndecl) = 1;
29276
29277 return new_fndecl;
29278 }
29279
29280
29281 /* Returns a decl of a function that implements conversion of an integer vector
29282 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
29283 are the types involved when converting according to CODE.
29284 Return NULL_TREE if it is not available. */
29285
29286 static tree
29287 ix86_vectorize_builtin_conversion (unsigned int code,
29288 tree dest_type, tree src_type)
29289 {
29290 if (! TARGET_SSE2)
29291 return NULL_TREE;
29292
29293 switch (code)
29294 {
29295 case FLOAT_EXPR:
29296 switch (TYPE_MODE (src_type))
29297 {
29298 case V4SImode:
29299 switch (TYPE_MODE (dest_type))
29300 {
29301 case V4SFmode:
29302 return (TYPE_UNSIGNED (src_type)
29303 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
29304 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
29305 case V4DFmode:
29306 return (TYPE_UNSIGNED (src_type)
29307 ? NULL_TREE
29308 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
29309 default:
29310 return NULL_TREE;
29311 }
29312 break;
29313 case V8SImode:
29314 switch (TYPE_MODE (dest_type))
29315 {
29316 case V8SFmode:
29317 return (TYPE_UNSIGNED (src_type)
29318 ? NULL_TREE
29319 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
29320 default:
29321 return NULL_TREE;
29322 }
29323 break;
29324 default:
29325 return NULL_TREE;
29326 }
29327
29328 case FIX_TRUNC_EXPR:
29329 switch (TYPE_MODE (dest_type))
29330 {
29331 case V4SImode:
29332 switch (TYPE_MODE (src_type))
29333 {
29334 case V4SFmode:
29335 return (TYPE_UNSIGNED (dest_type)
29336 ? NULL_TREE
29337 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
29338 case V4DFmode:
29339 return (TYPE_UNSIGNED (dest_type)
29340 ? NULL_TREE
29341 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
29342 default:
29343 return NULL_TREE;
29344 }
29345 break;
29346
29347 case V8SImode:
29348 switch (TYPE_MODE (src_type))
29349 {
29350 case V8SFmode:
29351 return (TYPE_UNSIGNED (dest_type)
29352 ? NULL_TREE
29353 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
29354 default:
29355 return NULL_TREE;
29356 }
29357 break;
29358
29359 default:
29360 return NULL_TREE;
29361 }
29362
29363 default:
29364 return NULL_TREE;
29365 }
29366
29367 return NULL_TREE;
29368 }
29369
29370 /* Returns a code for a target-specific builtin that implements
29371 reciprocal of the function, or NULL_TREE if not available. */
29372
29373 static tree
29374 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
29375 bool sqrt ATTRIBUTE_UNUSED)
29376 {
29377 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
29378 && flag_finite_math_only && !flag_trapping_math
29379 && flag_unsafe_math_optimizations))
29380 return NULL_TREE;
29381
29382 if (md_fn)
29383 /* Machine dependent builtins. */
29384 switch (fn)
29385 {
29386 /* Vectorized version of sqrt to rsqrt conversion. */
29387 case IX86_BUILTIN_SQRTPS_NR:
29388 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
29389
29390 case IX86_BUILTIN_SQRTPS_NR256:
29391 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
29392
29393 default:
29394 return NULL_TREE;
29395 }
29396 else
29397 /* Normal builtins. */
29398 switch (fn)
29399 {
29400 /* Sqrt to rsqrt conversion. */
29401 case BUILT_IN_SQRTF:
29402 return ix86_builtins[IX86_BUILTIN_RSQRTF];
29403
29404 default:
29405 return NULL_TREE;
29406 }
29407 }
29408 \f
29409 /* Helper for avx_vpermilps256_operand et al. This is also used by
29410 the expansion functions to turn the parallel back into a mask.
29411 The return value is 0 for no match and the imm8+1 for a match. */
29412
29413 int
29414 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
29415 {
29416 unsigned i, nelt = GET_MODE_NUNITS (mode);
29417 unsigned mask = 0;
29418 unsigned char ipar[8];
29419
29420 if (XVECLEN (par, 0) != (int) nelt)
29421 return 0;
29422
29423 /* Validate that all of the elements are constants, and not totally
29424 out of range. Copy the data into an integral array to make the
29425 subsequent checks easier. */
29426 for (i = 0; i < nelt; ++i)
29427 {
29428 rtx er = XVECEXP (par, 0, i);
29429 unsigned HOST_WIDE_INT ei;
29430
29431 if (!CONST_INT_P (er))
29432 return 0;
29433 ei = INTVAL (er);
29434 if (ei >= nelt)
29435 return 0;
29436 ipar[i] = ei;
29437 }
29438
29439 switch (mode)
29440 {
29441 case V4DFmode:
29442 /* In the 256-bit DFmode case, we can only move elements within
29443 a 128-bit lane. */
29444 for (i = 0; i < 2; ++i)
29445 {
29446 if (ipar[i] >= 2)
29447 return 0;
29448 mask |= ipar[i] << i;
29449 }
29450 for (i = 2; i < 4; ++i)
29451 {
29452 if (ipar[i] < 2)
29453 return 0;
29454 mask |= (ipar[i] - 2) << i;
29455 }
29456 break;
29457
29458 case V8SFmode:
29459 /* In the 256-bit SFmode case, we have full freedom of movement
29460 within the low 128-bit lane, but the high 128-bit lane must
29461 mirror the exact same pattern. */
29462 for (i = 0; i < 4; ++i)
29463 if (ipar[i] + 4 != ipar[i + 4])
29464 return 0;
29465 nelt = 4;
29466 /* FALLTHRU */
29467
29468 case V2DFmode:
29469 case V4SFmode:
29470 /* In the 128-bit case, we've full freedom in the placement of
29471 the elements from the source operand. */
29472 for (i = 0; i < nelt; ++i)
29473 mask |= ipar[i] << (i * (nelt / 2));
29474 break;
29475
29476 default:
29477 gcc_unreachable ();
29478 }
29479
29480 /* Make sure success has a non-zero value by adding one. */
29481 return mask + 1;
29482 }
29483
29484 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29485 the expansion functions to turn the parallel back into a mask.
29486 The return value is 0 for no match and the imm8+1 for a match. */
29487
29488 int
29489 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
29490 {
29491 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
29492 unsigned mask = 0;
29493 unsigned char ipar[8];
29494
29495 if (XVECLEN (par, 0) != (int) nelt)
29496 return 0;
29497
29498 /* Validate that all of the elements are constants, and not totally
29499 out of range. Copy the data into an integral array to make the
29500 subsequent checks easier. */
29501 for (i = 0; i < nelt; ++i)
29502 {
29503 rtx er = XVECEXP (par, 0, i);
29504 unsigned HOST_WIDE_INT ei;
29505
29506 if (!CONST_INT_P (er))
29507 return 0;
29508 ei = INTVAL (er);
29509 if (ei >= 2 * nelt)
29510 return 0;
29511 ipar[i] = ei;
29512 }
29513
29514 /* Validate that the halves of the permute are halves. */
29515 for (i = 0; i < nelt2 - 1; ++i)
29516 if (ipar[i] + 1 != ipar[i + 1])
29517 return 0;
29518 for (i = nelt2; i < nelt - 1; ++i)
29519 if (ipar[i] + 1 != ipar[i + 1])
29520 return 0;
29521
29522 /* Reconstruct the mask. */
29523 for (i = 0; i < 2; ++i)
29524 {
29525 unsigned e = ipar[i * nelt2];
29526 if (e % nelt2)
29527 return 0;
29528 e /= nelt2;
29529 mask |= e << (i * 4);
29530 }
29531
29532 /* Make sure success has a non-zero value by adding one. */
29533 return mask + 1;
29534 }
29535 \f
29536
29537 /* Store OPERAND to the memory after reload is completed. This means
29538 that we can't easily use assign_stack_local. */
29539 rtx
29540 ix86_force_to_memory (enum machine_mode mode, rtx operand)
29541 {
29542 rtx result;
29543
29544 gcc_assert (reload_completed);
29545 if (ix86_using_red_zone ())
29546 {
29547 result = gen_rtx_MEM (mode,
29548 gen_rtx_PLUS (Pmode,
29549 stack_pointer_rtx,
29550 GEN_INT (-RED_ZONE_SIZE)));
29551 emit_move_insn (result, operand);
29552 }
29553 else if (TARGET_64BIT)
29554 {
29555 switch (mode)
29556 {
29557 case HImode:
29558 case SImode:
29559 operand = gen_lowpart (DImode, operand);
29560 /* FALLTHRU */
29561 case DImode:
29562 emit_insn (
29563 gen_rtx_SET (VOIDmode,
29564 gen_rtx_MEM (DImode,
29565 gen_rtx_PRE_DEC (DImode,
29566 stack_pointer_rtx)),
29567 operand));
29568 break;
29569 default:
29570 gcc_unreachable ();
29571 }
29572 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29573 }
29574 else
29575 {
29576 switch (mode)
29577 {
29578 case DImode:
29579 {
29580 rtx operands[2];
29581 split_double_mode (mode, &operand, 1, operands, operands + 1);
29582 emit_insn (
29583 gen_rtx_SET (VOIDmode,
29584 gen_rtx_MEM (SImode,
29585 gen_rtx_PRE_DEC (Pmode,
29586 stack_pointer_rtx)),
29587 operands[1]));
29588 emit_insn (
29589 gen_rtx_SET (VOIDmode,
29590 gen_rtx_MEM (SImode,
29591 gen_rtx_PRE_DEC (Pmode,
29592 stack_pointer_rtx)),
29593 operands[0]));
29594 }
29595 break;
29596 case HImode:
29597 /* Store HImodes as SImodes. */
29598 operand = gen_lowpart (SImode, operand);
29599 /* FALLTHRU */
29600 case SImode:
29601 emit_insn (
29602 gen_rtx_SET (VOIDmode,
29603 gen_rtx_MEM (GET_MODE (operand),
29604 gen_rtx_PRE_DEC (SImode,
29605 stack_pointer_rtx)),
29606 operand));
29607 break;
29608 default:
29609 gcc_unreachable ();
29610 }
29611 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29612 }
29613 return result;
29614 }
29615
29616 /* Free operand from the memory. */
29617 void
29618 ix86_free_from_memory (enum machine_mode mode)
29619 {
29620 if (!ix86_using_red_zone ())
29621 {
29622 int size;
29623
29624 if (mode == DImode || TARGET_64BIT)
29625 size = 8;
29626 else
29627 size = 4;
29628 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29629 to pop or add instruction if registers are available. */
29630 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
29631 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
29632 GEN_INT (size))));
29633 }
29634 }
29635
29636 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29637
29638 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29639 QImode must go into class Q_REGS.
29640 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29641 movdf to do mem-to-mem moves through integer regs. */
29642
29643 static reg_class_t
29644 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
29645 {
29646 enum machine_mode mode = GET_MODE (x);
29647
29648 /* We're only allowed to return a subclass of CLASS. Many of the
29649 following checks fail for NO_REGS, so eliminate that early. */
29650 if (regclass == NO_REGS)
29651 return NO_REGS;
29652
29653 /* All classes can load zeros. */
29654 if (x == CONST0_RTX (mode))
29655 return regclass;
29656
29657 /* Force constants into memory if we are loading a (nonzero) constant into
29658 an MMX or SSE register. This is because there are no MMX/SSE instructions
29659 to load from a constant. */
29660 if (CONSTANT_P (x)
29661 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
29662 return NO_REGS;
29663
29664 /* Prefer SSE regs only, if we can use them for math. */
29665 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
29666 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
29667
29668 /* Floating-point constants need more complex checks. */
29669 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
29670 {
29671 /* General regs can load everything. */
29672 if (reg_class_subset_p (regclass, GENERAL_REGS))
29673 return regclass;
29674
29675 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29676 zero above. We only want to wind up preferring 80387 registers if
29677 we plan on doing computation with them. */
29678 if (TARGET_80387
29679 && standard_80387_constant_p (x) > 0)
29680 {
29681 /* Limit class to non-sse. */
29682 if (regclass == FLOAT_SSE_REGS)
29683 return FLOAT_REGS;
29684 if (regclass == FP_TOP_SSE_REGS)
29685 return FP_TOP_REG;
29686 if (regclass == FP_SECOND_SSE_REGS)
29687 return FP_SECOND_REG;
29688 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
29689 return regclass;
29690 }
29691
29692 return NO_REGS;
29693 }
29694
29695 /* Generally when we see PLUS here, it's the function invariant
29696 (plus soft-fp const_int). Which can only be computed into general
29697 regs. */
29698 if (GET_CODE (x) == PLUS)
29699 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
29700
29701 /* QImode constants are easy to load, but non-constant QImode data
29702 must go into Q_REGS. */
29703 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
29704 {
29705 if (reg_class_subset_p (regclass, Q_REGS))
29706 return regclass;
29707 if (reg_class_subset_p (Q_REGS, regclass))
29708 return Q_REGS;
29709 return NO_REGS;
29710 }
29711
29712 return regclass;
29713 }
29714
29715 /* Discourage putting floating-point values in SSE registers unless
29716 SSE math is being used, and likewise for the 387 registers. */
29717 static reg_class_t
29718 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
29719 {
29720 enum machine_mode mode = GET_MODE (x);
29721
29722 /* Restrict the output reload class to the register bank that we are doing
29723 math on. If we would like not to return a subset of CLASS, reject this
29724 alternative: if reload cannot do this, it will still use its choice. */
29725 mode = GET_MODE (x);
29726 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
29727 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
29728
29729 if (X87_FLOAT_MODE_P (mode))
29730 {
29731 if (regclass == FP_TOP_SSE_REGS)
29732 return FP_TOP_REG;
29733 else if (regclass == FP_SECOND_SSE_REGS)
29734 return FP_SECOND_REG;
29735 else
29736 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
29737 }
29738
29739 return regclass;
29740 }
29741
29742 static reg_class_t
29743 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
29744 enum machine_mode mode, secondary_reload_info *sri)
29745 {
29746 /* Double-word spills from general registers to non-offsettable memory
29747 references (zero-extended addresses) require special handling. */
29748 if (TARGET_64BIT
29749 && MEM_P (x)
29750 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
29751 && rclass == GENERAL_REGS
29752 && !offsettable_memref_p (x))
29753 {
29754 sri->icode = (in_p
29755 ? CODE_FOR_reload_noff_load
29756 : CODE_FOR_reload_noff_store);
29757 /* Add the cost of moving address to a temporary. */
29758 sri->extra_cost = 1;
29759
29760 return NO_REGS;
29761 }
29762
29763 /* QImode spills from non-QI registers require
29764 intermediate register on 32bit targets. */
29765 if (!TARGET_64BIT
29766 && !in_p && mode == QImode
29767 && (rclass == GENERAL_REGS
29768 || rclass == LEGACY_REGS
29769 || rclass == INDEX_REGS))
29770 {
29771 int regno;
29772
29773 if (REG_P (x))
29774 regno = REGNO (x);
29775 else
29776 regno = -1;
29777
29778 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
29779 regno = true_regnum (x);
29780
29781 /* Return Q_REGS if the operand is in memory. */
29782 if (regno == -1)
29783 return Q_REGS;
29784 }
29785
29786 /* This condition handles corner case where an expression involving
29787 pointers gets vectorized. We're trying to use the address of a
29788 stack slot as a vector initializer.
29789
29790 (set (reg:V2DI 74 [ vect_cst_.2 ])
29791 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29792
29793 Eventually frame gets turned into sp+offset like this:
29794
29795 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29796 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29797 (const_int 392 [0x188]))))
29798
29799 That later gets turned into:
29800
29801 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29802 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29803 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29804
29805 We'll have the following reload recorded:
29806
29807 Reload 0: reload_in (DI) =
29808 (plus:DI (reg/f:DI 7 sp)
29809 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29810 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29811 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29812 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29813 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29814 reload_reg_rtx: (reg:V2DI 22 xmm1)
29815
29816 Which isn't going to work since SSE instructions can't handle scalar
29817 additions. Returning GENERAL_REGS forces the addition into integer
29818 register and reload can handle subsequent reloads without problems. */
29819
29820 if (in_p && GET_CODE (x) == PLUS
29821 && SSE_CLASS_P (rclass)
29822 && SCALAR_INT_MODE_P (mode))
29823 return GENERAL_REGS;
29824
29825 return NO_REGS;
29826 }
29827
29828 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29829
29830 static bool
29831 ix86_class_likely_spilled_p (reg_class_t rclass)
29832 {
29833 switch (rclass)
29834 {
29835 case AREG:
29836 case DREG:
29837 case CREG:
29838 case BREG:
29839 case AD_REGS:
29840 case SIREG:
29841 case DIREG:
29842 case SSE_FIRST_REG:
29843 case FP_TOP_REG:
29844 case FP_SECOND_REG:
29845 return true;
29846
29847 default:
29848 break;
29849 }
29850
29851 return false;
29852 }
29853
29854 /* If we are copying between general and FP registers, we need a memory
29855 location. The same is true for SSE and MMX registers.
29856
29857 To optimize register_move_cost performance, allow inline variant.
29858
29859 The macro can't work reliably when one of the CLASSES is class containing
29860 registers from multiple units (SSE, MMX, integer). We avoid this by never
29861 combining those units in single alternative in the machine description.
29862 Ensure that this constraint holds to avoid unexpected surprises.
29863
29864 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29865 enforce these sanity checks. */
29866
29867 static inline bool
29868 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29869 enum machine_mode mode, int strict)
29870 {
29871 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29872 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29873 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29874 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29875 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29876 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29877 {
29878 gcc_assert (!strict);
29879 return true;
29880 }
29881
29882 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29883 return true;
29884
29885 /* ??? This is a lie. We do have moves between mmx/general, and for
29886 mmx/sse2. But by saying we need secondary memory we discourage the
29887 register allocator from using the mmx registers unless needed. */
29888 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29889 return true;
29890
29891 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29892 {
29893 /* SSE1 doesn't have any direct moves from other classes. */
29894 if (!TARGET_SSE2)
29895 return true;
29896
29897 /* If the target says that inter-unit moves are more expensive
29898 than moving through memory, then don't generate them. */
29899 if (!TARGET_INTER_UNIT_MOVES)
29900 return true;
29901
29902 /* Between SSE and general, we have moves no larger than word size. */
29903 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
29904 return true;
29905 }
29906
29907 return false;
29908 }
29909
29910 bool
29911 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29912 enum machine_mode mode, int strict)
29913 {
29914 return inline_secondary_memory_needed (class1, class2, mode, strict);
29915 }
29916
29917 /* Implement the TARGET_CLASS_MAX_NREGS hook.
29918
29919 On the 80386, this is the size of MODE in words,
29920 except in the FP regs, where a single reg is always enough. */
29921
29922 static unsigned char
29923 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
29924 {
29925 if (MAYBE_INTEGER_CLASS_P (rclass))
29926 {
29927 if (mode == XFmode)
29928 return (TARGET_64BIT ? 2 : 3);
29929 else if (mode == XCmode)
29930 return (TARGET_64BIT ? 4 : 6);
29931 else
29932 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
29933 }
29934 else
29935 {
29936 if (COMPLEX_MODE_P (mode))
29937 return 2;
29938 else
29939 return 1;
29940 }
29941 }
29942
29943 /* Return true if the registers in CLASS cannot represent the change from
29944 modes FROM to TO. */
29945
29946 bool
29947 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
29948 enum reg_class regclass)
29949 {
29950 if (from == to)
29951 return false;
29952
29953 /* x87 registers can't do subreg at all, as all values are reformatted
29954 to extended precision. */
29955 if (MAYBE_FLOAT_CLASS_P (regclass))
29956 return true;
29957
29958 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
29959 {
29960 /* Vector registers do not support QI or HImode loads. If we don't
29961 disallow a change to these modes, reload will assume it's ok to
29962 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
29963 the vec_dupv4hi pattern. */
29964 if (GET_MODE_SIZE (from) < 4)
29965 return true;
29966
29967 /* Vector registers do not support subreg with nonzero offsets, which
29968 are otherwise valid for integer registers. Since we can't see
29969 whether we have a nonzero offset from here, prohibit all
29970 nonparadoxical subregs changing size. */
29971 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
29972 return true;
29973 }
29974
29975 return false;
29976 }
29977
29978 /* Return the cost of moving data of mode M between a
29979 register and memory. A value of 2 is the default; this cost is
29980 relative to those in `REGISTER_MOVE_COST'.
29981
29982 This function is used extensively by register_move_cost that is used to
29983 build tables at startup. Make it inline in this case.
29984 When IN is 2, return maximum of in and out move cost.
29985
29986 If moving between registers and memory is more expensive than
29987 between two registers, you should define this macro to express the
29988 relative cost.
29989
29990 Model also increased moving costs of QImode registers in non
29991 Q_REGS classes.
29992 */
29993 static inline int
29994 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
29995 int in)
29996 {
29997 int cost;
29998 if (FLOAT_CLASS_P (regclass))
29999 {
30000 int index;
30001 switch (mode)
30002 {
30003 case SFmode:
30004 index = 0;
30005 break;
30006 case DFmode:
30007 index = 1;
30008 break;
30009 case XFmode:
30010 index = 2;
30011 break;
30012 default:
30013 return 100;
30014 }
30015 if (in == 2)
30016 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
30017 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
30018 }
30019 if (SSE_CLASS_P (regclass))
30020 {
30021 int index;
30022 switch (GET_MODE_SIZE (mode))
30023 {
30024 case 4:
30025 index = 0;
30026 break;
30027 case 8:
30028 index = 1;
30029 break;
30030 case 16:
30031 index = 2;
30032 break;
30033 default:
30034 return 100;
30035 }
30036 if (in == 2)
30037 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
30038 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
30039 }
30040 if (MMX_CLASS_P (regclass))
30041 {
30042 int index;
30043 switch (GET_MODE_SIZE (mode))
30044 {
30045 case 4:
30046 index = 0;
30047 break;
30048 case 8:
30049 index = 1;
30050 break;
30051 default:
30052 return 100;
30053 }
30054 if (in)
30055 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
30056 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
30057 }
30058 switch (GET_MODE_SIZE (mode))
30059 {
30060 case 1:
30061 if (Q_CLASS_P (regclass) || TARGET_64BIT)
30062 {
30063 if (!in)
30064 return ix86_cost->int_store[0];
30065 if (TARGET_PARTIAL_REG_DEPENDENCY
30066 && optimize_function_for_speed_p (cfun))
30067 cost = ix86_cost->movzbl_load;
30068 else
30069 cost = ix86_cost->int_load[0];
30070 if (in == 2)
30071 return MAX (cost, ix86_cost->int_store[0]);
30072 return cost;
30073 }
30074 else
30075 {
30076 if (in == 2)
30077 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
30078 if (in)
30079 return ix86_cost->movzbl_load;
30080 else
30081 return ix86_cost->int_store[0] + 4;
30082 }
30083 break;
30084 case 2:
30085 if (in == 2)
30086 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
30087 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
30088 default:
30089 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
30090 if (mode == TFmode)
30091 mode = XFmode;
30092 if (in == 2)
30093 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
30094 else if (in)
30095 cost = ix86_cost->int_load[2];
30096 else
30097 cost = ix86_cost->int_store[2];
30098 return (cost * (((int) GET_MODE_SIZE (mode)
30099 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
30100 }
30101 }
30102
30103 static int
30104 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
30105 bool in)
30106 {
30107 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
30108 }
30109
30110
30111 /* Return the cost of moving data from a register in class CLASS1 to
30112 one in class CLASS2.
30113
30114 It is not required that the cost always equal 2 when FROM is the same as TO;
30115 on some machines it is expensive to move between registers if they are not
30116 general registers. */
30117
30118 static int
30119 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
30120 reg_class_t class2_i)
30121 {
30122 enum reg_class class1 = (enum reg_class) class1_i;
30123 enum reg_class class2 = (enum reg_class) class2_i;
30124
30125 /* In case we require secondary memory, compute cost of the store followed
30126 by load. In order to avoid bad register allocation choices, we need
30127 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
30128
30129 if (inline_secondary_memory_needed (class1, class2, mode, 0))
30130 {
30131 int cost = 1;
30132
30133 cost += inline_memory_move_cost (mode, class1, 2);
30134 cost += inline_memory_move_cost (mode, class2, 2);
30135
30136 /* In case of copying from general_purpose_register we may emit multiple
30137 stores followed by single load causing memory size mismatch stall.
30138 Count this as arbitrarily high cost of 20. */
30139 if (targetm.class_max_nregs (class1, mode)
30140 > targetm.class_max_nregs (class2, mode))
30141 cost += 20;
30142
30143 /* In the case of FP/MMX moves, the registers actually overlap, and we
30144 have to switch modes in order to treat them differently. */
30145 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
30146 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
30147 cost += 20;
30148
30149 return cost;
30150 }
30151
30152 /* Moves between SSE/MMX and integer unit are expensive. */
30153 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
30154 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30155
30156 /* ??? By keeping returned value relatively high, we limit the number
30157 of moves between integer and MMX/SSE registers for all targets.
30158 Additionally, high value prevents problem with x86_modes_tieable_p(),
30159 where integer modes in MMX/SSE registers are not tieable
30160 because of missing QImode and HImode moves to, from or between
30161 MMX/SSE registers. */
30162 return MAX (8, ix86_cost->mmxsse_to_integer);
30163
30164 if (MAYBE_FLOAT_CLASS_P (class1))
30165 return ix86_cost->fp_move;
30166 if (MAYBE_SSE_CLASS_P (class1))
30167 return ix86_cost->sse_move;
30168 if (MAYBE_MMX_CLASS_P (class1))
30169 return ix86_cost->mmx_move;
30170 return 2;
30171 }
30172
30173 /* Return TRUE if hard register REGNO can hold a value of machine-mode
30174 MODE. */
30175
30176 bool
30177 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
30178 {
30179 /* Flags and only flags can only hold CCmode values. */
30180 if (CC_REGNO_P (regno))
30181 return GET_MODE_CLASS (mode) == MODE_CC;
30182 if (GET_MODE_CLASS (mode) == MODE_CC
30183 || GET_MODE_CLASS (mode) == MODE_RANDOM
30184 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
30185 return false;
30186 if (FP_REGNO_P (regno))
30187 return VALID_FP_MODE_P (mode);
30188 if (SSE_REGNO_P (regno))
30189 {
30190 /* We implement the move patterns for all vector modes into and
30191 out of SSE registers, even when no operation instructions
30192 are available. OImode move is available only when AVX is
30193 enabled. */
30194 return ((TARGET_AVX && mode == OImode)
30195 || VALID_AVX256_REG_MODE (mode)
30196 || VALID_SSE_REG_MODE (mode)
30197 || VALID_SSE2_REG_MODE (mode)
30198 || VALID_MMX_REG_MODE (mode)
30199 || VALID_MMX_REG_MODE_3DNOW (mode));
30200 }
30201 if (MMX_REGNO_P (regno))
30202 {
30203 /* We implement the move patterns for 3DNOW modes even in MMX mode,
30204 so if the register is available at all, then we can move data of
30205 the given mode into or out of it. */
30206 return (VALID_MMX_REG_MODE (mode)
30207 || VALID_MMX_REG_MODE_3DNOW (mode));
30208 }
30209
30210 if (mode == QImode)
30211 {
30212 /* Take care for QImode values - they can be in non-QI regs,
30213 but then they do cause partial register stalls. */
30214 if (regno <= BX_REG || TARGET_64BIT)
30215 return true;
30216 if (!TARGET_PARTIAL_REG_STALL)
30217 return true;
30218 return !can_create_pseudo_p ();
30219 }
30220 /* We handle both integer and floats in the general purpose registers. */
30221 else if (VALID_INT_MODE_P (mode))
30222 return true;
30223 else if (VALID_FP_MODE_P (mode))
30224 return true;
30225 else if (VALID_DFP_MODE_P (mode))
30226 return true;
30227 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
30228 on to use that value in smaller contexts, this can easily force a
30229 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
30230 supporting DImode, allow it. */
30231 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
30232 return true;
30233
30234 return false;
30235 }
30236
30237 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
30238 tieable integer mode. */
30239
30240 static bool
30241 ix86_tieable_integer_mode_p (enum machine_mode mode)
30242 {
30243 switch (mode)
30244 {
30245 case HImode:
30246 case SImode:
30247 return true;
30248
30249 case QImode:
30250 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
30251
30252 case DImode:
30253 return TARGET_64BIT;
30254
30255 default:
30256 return false;
30257 }
30258 }
30259
30260 /* Return true if MODE1 is accessible in a register that can hold MODE2
30261 without copying. That is, all register classes that can hold MODE2
30262 can also hold MODE1. */
30263
30264 bool
30265 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
30266 {
30267 if (mode1 == mode2)
30268 return true;
30269
30270 if (ix86_tieable_integer_mode_p (mode1)
30271 && ix86_tieable_integer_mode_p (mode2))
30272 return true;
30273
30274 /* MODE2 being XFmode implies fp stack or general regs, which means we
30275 can tie any smaller floating point modes to it. Note that we do not
30276 tie this with TFmode. */
30277 if (mode2 == XFmode)
30278 return mode1 == SFmode || mode1 == DFmode;
30279
30280 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
30281 that we can tie it with SFmode. */
30282 if (mode2 == DFmode)
30283 return mode1 == SFmode;
30284
30285 /* If MODE2 is only appropriate for an SSE register, then tie with
30286 any other mode acceptable to SSE registers. */
30287 if (GET_MODE_SIZE (mode2) == 16
30288 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
30289 return (GET_MODE_SIZE (mode1) == 16
30290 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
30291
30292 /* If MODE2 is appropriate for an MMX register, then tie
30293 with any other mode acceptable to MMX registers. */
30294 if (GET_MODE_SIZE (mode2) == 8
30295 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
30296 return (GET_MODE_SIZE (mode1) == 8
30297 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
30298
30299 return false;
30300 }
30301
30302 /* Compute a (partial) cost for rtx X. Return true if the complete
30303 cost has been computed, and false if subexpressions should be
30304 scanned. In either case, *TOTAL contains the cost result. */
30305
30306 static bool
30307 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
30308 bool speed)
30309 {
30310 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
30311 enum machine_mode mode = GET_MODE (x);
30312 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
30313
30314 switch (code)
30315 {
30316 case CONST_INT:
30317 case CONST:
30318 case LABEL_REF:
30319 case SYMBOL_REF:
30320 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
30321 *total = 3;
30322 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
30323 *total = 2;
30324 else if (flag_pic && SYMBOLIC_CONST (x)
30325 && (!TARGET_64BIT
30326 || (!GET_CODE (x) != LABEL_REF
30327 && (GET_CODE (x) != SYMBOL_REF
30328 || !SYMBOL_REF_LOCAL_P (x)))))
30329 *total = 1;
30330 else
30331 *total = 0;
30332 return true;
30333
30334 case CONST_DOUBLE:
30335 if (mode == VOIDmode)
30336 *total = 0;
30337 else
30338 switch (standard_80387_constant_p (x))
30339 {
30340 case 1: /* 0.0 */
30341 *total = 1;
30342 break;
30343 default: /* Other constants */
30344 *total = 2;
30345 break;
30346 case 0:
30347 case -1:
30348 /* Start with (MEM (SYMBOL_REF)), since that's where
30349 it'll probably end up. Add a penalty for size. */
30350 *total = (COSTS_N_INSNS (1)
30351 + (flag_pic != 0 && !TARGET_64BIT)
30352 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
30353 break;
30354 }
30355 return true;
30356
30357 case ZERO_EXTEND:
30358 /* The zero extensions is often completely free on x86_64, so make
30359 it as cheap as possible. */
30360 if (TARGET_64BIT && mode == DImode
30361 && GET_MODE (XEXP (x, 0)) == SImode)
30362 *total = 1;
30363 else if (TARGET_ZERO_EXTEND_WITH_AND)
30364 *total = cost->add;
30365 else
30366 *total = cost->movzx;
30367 return false;
30368
30369 case SIGN_EXTEND:
30370 *total = cost->movsx;
30371 return false;
30372
30373 case ASHIFT:
30374 if (CONST_INT_P (XEXP (x, 1))
30375 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
30376 {
30377 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30378 if (value == 1)
30379 {
30380 *total = cost->add;
30381 return false;
30382 }
30383 if ((value == 2 || value == 3)
30384 && cost->lea <= cost->shift_const)
30385 {
30386 *total = cost->lea;
30387 return false;
30388 }
30389 }
30390 /* FALLTHRU */
30391
30392 case ROTATE:
30393 case ASHIFTRT:
30394 case LSHIFTRT:
30395 case ROTATERT:
30396 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
30397 {
30398 if (CONST_INT_P (XEXP (x, 1)))
30399 {
30400 if (INTVAL (XEXP (x, 1)) > 32)
30401 *total = cost->shift_const + COSTS_N_INSNS (2);
30402 else
30403 *total = cost->shift_const * 2;
30404 }
30405 else
30406 {
30407 if (GET_CODE (XEXP (x, 1)) == AND)
30408 *total = cost->shift_var * 2;
30409 else
30410 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
30411 }
30412 }
30413 else
30414 {
30415 if (CONST_INT_P (XEXP (x, 1)))
30416 *total = cost->shift_const;
30417 else
30418 *total = cost->shift_var;
30419 }
30420 return false;
30421
30422 case FMA:
30423 {
30424 rtx sub;
30425
30426 gcc_assert (FLOAT_MODE_P (mode));
30427 gcc_assert (TARGET_FMA || TARGET_FMA4);
30428
30429 /* ??? SSE scalar/vector cost should be used here. */
30430 /* ??? Bald assumption that fma has the same cost as fmul. */
30431 *total = cost->fmul;
30432 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
30433
30434 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30435 sub = XEXP (x, 0);
30436 if (GET_CODE (sub) == NEG)
30437 sub = XEXP (sub, 0);
30438 *total += rtx_cost (sub, FMA, 0, speed);
30439
30440 sub = XEXP (x, 2);
30441 if (GET_CODE (sub) == NEG)
30442 sub = XEXP (sub, 0);
30443 *total += rtx_cost (sub, FMA, 2, speed);
30444 return true;
30445 }
30446
30447 case MULT:
30448 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30449 {
30450 /* ??? SSE scalar cost should be used here. */
30451 *total = cost->fmul;
30452 return false;
30453 }
30454 else if (X87_FLOAT_MODE_P (mode))
30455 {
30456 *total = cost->fmul;
30457 return false;
30458 }
30459 else if (FLOAT_MODE_P (mode))
30460 {
30461 /* ??? SSE vector cost should be used here. */
30462 *total = cost->fmul;
30463 return false;
30464 }
30465 else
30466 {
30467 rtx op0 = XEXP (x, 0);
30468 rtx op1 = XEXP (x, 1);
30469 int nbits;
30470 if (CONST_INT_P (XEXP (x, 1)))
30471 {
30472 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30473 for (nbits = 0; value != 0; value &= value - 1)
30474 nbits++;
30475 }
30476 else
30477 /* This is arbitrary. */
30478 nbits = 7;
30479
30480 /* Compute costs correctly for widening multiplication. */
30481 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
30482 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
30483 == GET_MODE_SIZE (mode))
30484 {
30485 int is_mulwiden = 0;
30486 enum machine_mode inner_mode = GET_MODE (op0);
30487
30488 if (GET_CODE (op0) == GET_CODE (op1))
30489 is_mulwiden = 1, op1 = XEXP (op1, 0);
30490 else if (CONST_INT_P (op1))
30491 {
30492 if (GET_CODE (op0) == SIGN_EXTEND)
30493 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
30494 == INTVAL (op1);
30495 else
30496 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
30497 }
30498
30499 if (is_mulwiden)
30500 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
30501 }
30502
30503 *total = (cost->mult_init[MODE_INDEX (mode)]
30504 + nbits * cost->mult_bit
30505 + rtx_cost (op0, outer_code, opno, speed)
30506 + rtx_cost (op1, outer_code, opno, speed));
30507
30508 return true;
30509 }
30510
30511 case DIV:
30512 case UDIV:
30513 case MOD:
30514 case UMOD:
30515 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30516 /* ??? SSE cost should be used here. */
30517 *total = cost->fdiv;
30518 else if (X87_FLOAT_MODE_P (mode))
30519 *total = cost->fdiv;
30520 else if (FLOAT_MODE_P (mode))
30521 /* ??? SSE vector cost should be used here. */
30522 *total = cost->fdiv;
30523 else
30524 *total = cost->divide[MODE_INDEX (mode)];
30525 return false;
30526
30527 case PLUS:
30528 if (GET_MODE_CLASS (mode) == MODE_INT
30529 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
30530 {
30531 if (GET_CODE (XEXP (x, 0)) == PLUS
30532 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
30533 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
30534 && CONSTANT_P (XEXP (x, 1)))
30535 {
30536 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
30537 if (val == 2 || val == 4 || val == 8)
30538 {
30539 *total = cost->lea;
30540 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30541 outer_code, opno, speed);
30542 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
30543 outer_code, opno, speed);
30544 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30545 return true;
30546 }
30547 }
30548 else if (GET_CODE (XEXP (x, 0)) == MULT
30549 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
30550 {
30551 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
30552 if (val == 2 || val == 4 || val == 8)
30553 {
30554 *total = cost->lea;
30555 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30556 outer_code, opno, speed);
30557 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30558 return true;
30559 }
30560 }
30561 else if (GET_CODE (XEXP (x, 0)) == PLUS)
30562 {
30563 *total = cost->lea;
30564 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30565 outer_code, opno, speed);
30566 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30567 outer_code, opno, speed);
30568 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30569 return true;
30570 }
30571 }
30572 /* FALLTHRU */
30573
30574 case MINUS:
30575 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30576 {
30577 /* ??? SSE cost should be used here. */
30578 *total = cost->fadd;
30579 return false;
30580 }
30581 else if (X87_FLOAT_MODE_P (mode))
30582 {
30583 *total = cost->fadd;
30584 return false;
30585 }
30586 else if (FLOAT_MODE_P (mode))
30587 {
30588 /* ??? SSE vector cost should be used here. */
30589 *total = cost->fadd;
30590 return false;
30591 }
30592 /* FALLTHRU */
30593
30594 case AND:
30595 case IOR:
30596 case XOR:
30597 if (!TARGET_64BIT && mode == DImode)
30598 {
30599 *total = (cost->add * 2
30600 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
30601 << (GET_MODE (XEXP (x, 0)) != DImode))
30602 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
30603 << (GET_MODE (XEXP (x, 1)) != DImode)));
30604 return true;
30605 }
30606 /* FALLTHRU */
30607
30608 case NEG:
30609 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30610 {
30611 /* ??? SSE cost should be used here. */
30612 *total = cost->fchs;
30613 return false;
30614 }
30615 else if (X87_FLOAT_MODE_P (mode))
30616 {
30617 *total = cost->fchs;
30618 return false;
30619 }
30620 else if (FLOAT_MODE_P (mode))
30621 {
30622 /* ??? SSE vector cost should be used here. */
30623 *total = cost->fchs;
30624 return false;
30625 }
30626 /* FALLTHRU */
30627
30628 case NOT:
30629 if (!TARGET_64BIT && mode == DImode)
30630 *total = cost->add * 2;
30631 else
30632 *total = cost->add;
30633 return false;
30634
30635 case COMPARE:
30636 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
30637 && XEXP (XEXP (x, 0), 1) == const1_rtx
30638 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
30639 && XEXP (x, 1) == const0_rtx)
30640 {
30641 /* This kind of construct is implemented using test[bwl].
30642 Treat it as if we had an AND. */
30643 *total = (cost->add
30644 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
30645 + rtx_cost (const1_rtx, outer_code, opno, speed));
30646 return true;
30647 }
30648 return false;
30649
30650 case FLOAT_EXTEND:
30651 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
30652 *total = 0;
30653 return false;
30654
30655 case ABS:
30656 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30657 /* ??? SSE cost should be used here. */
30658 *total = cost->fabs;
30659 else if (X87_FLOAT_MODE_P (mode))
30660 *total = cost->fabs;
30661 else if (FLOAT_MODE_P (mode))
30662 /* ??? SSE vector cost should be used here. */
30663 *total = cost->fabs;
30664 return false;
30665
30666 case SQRT:
30667 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30668 /* ??? SSE cost should be used here. */
30669 *total = cost->fsqrt;
30670 else if (X87_FLOAT_MODE_P (mode))
30671 *total = cost->fsqrt;
30672 else if (FLOAT_MODE_P (mode))
30673 /* ??? SSE vector cost should be used here. */
30674 *total = cost->fsqrt;
30675 return false;
30676
30677 case UNSPEC:
30678 if (XINT (x, 1) == UNSPEC_TP)
30679 *total = 0;
30680 return false;
30681
30682 case VEC_SELECT:
30683 case VEC_CONCAT:
30684 case VEC_MERGE:
30685 case VEC_DUPLICATE:
30686 /* ??? Assume all of these vector manipulation patterns are
30687 recognizable. In which case they all pretty much have the
30688 same cost. */
30689 *total = COSTS_N_INSNS (1);
30690 return true;
30691
30692 default:
30693 return false;
30694 }
30695 }
30696
30697 #if TARGET_MACHO
30698
30699 static int current_machopic_label_num;
30700
30701 /* Given a symbol name and its associated stub, write out the
30702 definition of the stub. */
30703
30704 void
30705 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30706 {
30707 unsigned int length;
30708 char *binder_name, *symbol_name, lazy_ptr_name[32];
30709 int label = ++current_machopic_label_num;
30710
30711 /* For 64-bit we shouldn't get here. */
30712 gcc_assert (!TARGET_64BIT);
30713
30714 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30715 symb = targetm.strip_name_encoding (symb);
30716
30717 length = strlen (stub);
30718 binder_name = XALLOCAVEC (char, length + 32);
30719 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
30720
30721 length = strlen (symb);
30722 symbol_name = XALLOCAVEC (char, length + 32);
30723 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30724
30725 sprintf (lazy_ptr_name, "L%d$lz", label);
30726
30727 if (MACHOPIC_ATT_STUB)
30728 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
30729 else if (MACHOPIC_PURE)
30730 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
30731 else
30732 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
30733
30734 fprintf (file, "%s:\n", stub);
30735 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30736
30737 if (MACHOPIC_ATT_STUB)
30738 {
30739 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30740 }
30741 else if (MACHOPIC_PURE)
30742 {
30743 /* PIC stub. */
30744 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30745 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
30746 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30747 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30748 label, lazy_ptr_name, label);
30749 fprintf (file, "\tjmp\t*%%ecx\n");
30750 }
30751 else
30752 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
30753
30754 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30755 it needs no stub-binding-helper. */
30756 if (MACHOPIC_ATT_STUB)
30757 return;
30758
30759 fprintf (file, "%s:\n", binder_name);
30760
30761 if (MACHOPIC_PURE)
30762 {
30763 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
30764 fprintf (file, "\tpushl\t%%ecx\n");
30765 }
30766 else
30767 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
30768
30769 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
30770
30771 /* N.B. Keep the correspondence of these
30772 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30773 old-pic/new-pic/non-pic stubs; altering this will break
30774 compatibility with existing dylibs. */
30775 if (MACHOPIC_PURE)
30776 {
30777 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30778 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
30779 }
30780 else
30781 /* 16-byte -mdynamic-no-pic stub. */
30782 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
30783
30784 fprintf (file, "%s:\n", lazy_ptr_name);
30785 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30786 fprintf (file, ASM_LONG "%s\n", binder_name);
30787 }
30788 #endif /* TARGET_MACHO */
30789
30790 /* Order the registers for register allocator. */
30791
30792 void
30793 x86_order_regs_for_local_alloc (void)
30794 {
30795 int pos = 0;
30796 int i;
30797
30798 /* First allocate the local general purpose registers. */
30799 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30800 if (GENERAL_REGNO_P (i) && call_used_regs[i])
30801 reg_alloc_order [pos++] = i;
30802
30803 /* Global general purpose registers. */
30804 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30805 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
30806 reg_alloc_order [pos++] = i;
30807
30808 /* x87 registers come first in case we are doing FP math
30809 using them. */
30810 if (!TARGET_SSE_MATH)
30811 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30812 reg_alloc_order [pos++] = i;
30813
30814 /* SSE registers. */
30815 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
30816 reg_alloc_order [pos++] = i;
30817 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
30818 reg_alloc_order [pos++] = i;
30819
30820 /* x87 registers. */
30821 if (TARGET_SSE_MATH)
30822 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30823 reg_alloc_order [pos++] = i;
30824
30825 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
30826 reg_alloc_order [pos++] = i;
30827
30828 /* Initialize the rest of array as we do not allocate some registers
30829 at all. */
30830 while (pos < FIRST_PSEUDO_REGISTER)
30831 reg_alloc_order [pos++] = 0;
30832 }
30833
30834 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30835 in struct attribute_spec handler. */
30836 static tree
30837 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
30838 tree args,
30839 int flags ATTRIBUTE_UNUSED,
30840 bool *no_add_attrs)
30841 {
30842 if (TREE_CODE (*node) != FUNCTION_TYPE
30843 && TREE_CODE (*node) != METHOD_TYPE
30844 && TREE_CODE (*node) != FIELD_DECL
30845 && TREE_CODE (*node) != TYPE_DECL)
30846 {
30847 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30848 name);
30849 *no_add_attrs = true;
30850 return NULL_TREE;
30851 }
30852 if (TARGET_64BIT)
30853 {
30854 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
30855 name);
30856 *no_add_attrs = true;
30857 return NULL_TREE;
30858 }
30859 if (is_attribute_p ("callee_pop_aggregate_return", name))
30860 {
30861 tree cst;
30862
30863 cst = TREE_VALUE (args);
30864 if (TREE_CODE (cst) != INTEGER_CST)
30865 {
30866 warning (OPT_Wattributes,
30867 "%qE attribute requires an integer constant argument",
30868 name);
30869 *no_add_attrs = true;
30870 }
30871 else if (compare_tree_int (cst, 0) != 0
30872 && compare_tree_int (cst, 1) != 0)
30873 {
30874 warning (OPT_Wattributes,
30875 "argument to %qE attribute is neither zero, nor one",
30876 name);
30877 *no_add_attrs = true;
30878 }
30879
30880 return NULL_TREE;
30881 }
30882
30883 return NULL_TREE;
30884 }
30885
30886 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30887 struct attribute_spec.handler. */
30888 static tree
30889 ix86_handle_abi_attribute (tree *node, tree name,
30890 tree args ATTRIBUTE_UNUSED,
30891 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30892 {
30893 if (TREE_CODE (*node) != FUNCTION_TYPE
30894 && TREE_CODE (*node) != METHOD_TYPE
30895 && TREE_CODE (*node) != FIELD_DECL
30896 && TREE_CODE (*node) != TYPE_DECL)
30897 {
30898 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30899 name);
30900 *no_add_attrs = true;
30901 return NULL_TREE;
30902 }
30903
30904 /* Can combine regparm with all attributes but fastcall. */
30905 if (is_attribute_p ("ms_abi", name))
30906 {
30907 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
30908 {
30909 error ("ms_abi and sysv_abi attributes are not compatible");
30910 }
30911
30912 return NULL_TREE;
30913 }
30914 else if (is_attribute_p ("sysv_abi", name))
30915 {
30916 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
30917 {
30918 error ("ms_abi and sysv_abi attributes are not compatible");
30919 }
30920
30921 return NULL_TREE;
30922 }
30923
30924 return NULL_TREE;
30925 }
30926
30927 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30928 struct attribute_spec.handler. */
30929 static tree
30930 ix86_handle_struct_attribute (tree *node, tree name,
30931 tree args ATTRIBUTE_UNUSED,
30932 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30933 {
30934 tree *type = NULL;
30935 if (DECL_P (*node))
30936 {
30937 if (TREE_CODE (*node) == TYPE_DECL)
30938 type = &TREE_TYPE (*node);
30939 }
30940 else
30941 type = node;
30942
30943 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30944 || TREE_CODE (*type) == UNION_TYPE)))
30945 {
30946 warning (OPT_Wattributes, "%qE attribute ignored",
30947 name);
30948 *no_add_attrs = true;
30949 }
30950
30951 else if ((is_attribute_p ("ms_struct", name)
30952 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30953 || ((is_attribute_p ("gcc_struct", name)
30954 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30955 {
30956 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30957 name);
30958 *no_add_attrs = true;
30959 }
30960
30961 return NULL_TREE;
30962 }
30963
30964 static tree
30965 ix86_handle_fndecl_attribute (tree *node, tree name,
30966 tree args ATTRIBUTE_UNUSED,
30967 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30968 {
30969 if (TREE_CODE (*node) != FUNCTION_DECL)
30970 {
30971 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30972 name);
30973 *no_add_attrs = true;
30974 }
30975 return NULL_TREE;
30976 }
30977
30978 static bool
30979 ix86_ms_bitfield_layout_p (const_tree record_type)
30980 {
30981 return ((TARGET_MS_BITFIELD_LAYOUT
30982 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30983 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
30984 }
30985
30986 /* Returns an expression indicating where the this parameter is
30987 located on entry to the FUNCTION. */
30988
30989 static rtx
30990 x86_this_parameter (tree function)
30991 {
30992 tree type = TREE_TYPE (function);
30993 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
30994 int nregs;
30995
30996 if (TARGET_64BIT)
30997 {
30998 const int *parm_regs;
30999
31000 if (ix86_function_type_abi (type) == MS_ABI)
31001 parm_regs = x86_64_ms_abi_int_parameter_registers;
31002 else
31003 parm_regs = x86_64_int_parameter_registers;
31004 return gen_rtx_REG (DImode, parm_regs[aggr]);
31005 }
31006
31007 nregs = ix86_function_regparm (type, function);
31008
31009 if (nregs > 0 && !stdarg_p (type))
31010 {
31011 int regno;
31012 unsigned int ccvt = ix86_get_callcvt (type);
31013
31014 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
31015 regno = aggr ? DX_REG : CX_REG;
31016 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
31017 {
31018 regno = CX_REG;
31019 if (aggr)
31020 return gen_rtx_MEM (SImode,
31021 plus_constant (stack_pointer_rtx, 4));
31022 }
31023 else
31024 {
31025 regno = AX_REG;
31026 if (aggr)
31027 {
31028 regno = DX_REG;
31029 if (nregs == 1)
31030 return gen_rtx_MEM (SImode,
31031 plus_constant (stack_pointer_rtx, 4));
31032 }
31033 }
31034 return gen_rtx_REG (SImode, regno);
31035 }
31036
31037 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
31038 }
31039
31040 /* Determine whether x86_output_mi_thunk can succeed. */
31041
31042 static bool
31043 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
31044 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
31045 HOST_WIDE_INT vcall_offset, const_tree function)
31046 {
31047 /* 64-bit can handle anything. */
31048 if (TARGET_64BIT)
31049 return true;
31050
31051 /* For 32-bit, everything's fine if we have one free register. */
31052 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
31053 return true;
31054
31055 /* Need a free register for vcall_offset. */
31056 if (vcall_offset)
31057 return false;
31058
31059 /* Need a free register for GOT references. */
31060 if (flag_pic && !targetm.binds_local_p (function))
31061 return false;
31062
31063 /* Otherwise ok. */
31064 return true;
31065 }
31066
31067 /* Output the assembler code for a thunk function. THUNK_DECL is the
31068 declaration for the thunk function itself, FUNCTION is the decl for
31069 the target function. DELTA is an immediate constant offset to be
31070 added to THIS. If VCALL_OFFSET is nonzero, the word at
31071 *(*this + vcall_offset) should be added to THIS. */
31072
31073 static void
31074 x86_output_mi_thunk (FILE *file,
31075 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
31076 HOST_WIDE_INT vcall_offset, tree function)
31077 {
31078 rtx this_param = x86_this_parameter (function);
31079 rtx this_reg, tmp, fnaddr;
31080
31081 emit_note (NOTE_INSN_PROLOGUE_END);
31082
31083 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
31084 pull it in now and let DELTA benefit. */
31085 if (REG_P (this_param))
31086 this_reg = this_param;
31087 else if (vcall_offset)
31088 {
31089 /* Put the this parameter into %eax. */
31090 this_reg = gen_rtx_REG (Pmode, AX_REG);
31091 emit_move_insn (this_reg, this_param);
31092 }
31093 else
31094 this_reg = NULL_RTX;
31095
31096 /* Adjust the this parameter by a fixed constant. */
31097 if (delta)
31098 {
31099 rtx delta_rtx = GEN_INT (delta);
31100 rtx delta_dst = this_reg ? this_reg : this_param;
31101
31102 if (TARGET_64BIT)
31103 {
31104 if (!x86_64_general_operand (delta_rtx, Pmode))
31105 {
31106 tmp = gen_rtx_REG (Pmode, R10_REG);
31107 emit_move_insn (tmp, delta_rtx);
31108 delta_rtx = tmp;
31109 }
31110 }
31111
31112 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
31113 }
31114
31115 /* Adjust the this parameter by a value stored in the vtable. */
31116 if (vcall_offset)
31117 {
31118 rtx vcall_addr, vcall_mem, this_mem;
31119 unsigned int tmp_regno;
31120
31121 if (TARGET_64BIT)
31122 tmp_regno = R10_REG;
31123 else
31124 {
31125 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
31126 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
31127 tmp_regno = AX_REG;
31128 else
31129 tmp_regno = CX_REG;
31130 }
31131 tmp = gen_rtx_REG (Pmode, tmp_regno);
31132
31133 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
31134 if (Pmode != ptr_mode)
31135 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
31136 emit_move_insn (tmp, this_mem);
31137
31138 /* Adjust the this parameter. */
31139 vcall_addr = plus_constant (tmp, vcall_offset);
31140 if (TARGET_64BIT
31141 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
31142 {
31143 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
31144 emit_move_insn (tmp2, GEN_INT (vcall_offset));
31145 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
31146 }
31147
31148 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
31149 if (Pmode != ptr_mode)
31150 emit_insn (gen_addsi_1_zext (this_reg,
31151 gen_rtx_REG (ptr_mode,
31152 REGNO (this_reg)),
31153 vcall_mem));
31154 else
31155 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
31156 }
31157
31158 /* If necessary, drop THIS back to its stack slot. */
31159 if (this_reg && this_reg != this_param)
31160 emit_move_insn (this_param, this_reg);
31161
31162 fnaddr = XEXP (DECL_RTL (function), 0);
31163 if (TARGET_64BIT)
31164 {
31165 if (!flag_pic || targetm.binds_local_p (function)
31166 || cfun->machine->call_abi == MS_ABI)
31167 ;
31168 else
31169 {
31170 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
31171 tmp = gen_rtx_CONST (Pmode, tmp);
31172 fnaddr = gen_rtx_MEM (Pmode, tmp);
31173 }
31174 }
31175 else
31176 {
31177 if (!flag_pic || targetm.binds_local_p (function))
31178 ;
31179 #if TARGET_MACHO
31180 else if (TARGET_MACHO)
31181 {
31182 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
31183 fnaddr = XEXP (fnaddr, 0);
31184 }
31185 #endif /* TARGET_MACHO */
31186 else
31187 {
31188 tmp = gen_rtx_REG (Pmode, CX_REG);
31189 output_set_got (tmp, NULL_RTX);
31190
31191 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
31192 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
31193 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
31194 }
31195 }
31196
31197 /* Our sibling call patterns do not allow memories, because we have no
31198 predicate that can distinguish between frame and non-frame memory.
31199 For our purposes here, we can get away with (ab)using a jump pattern,
31200 because we're going to do no optimization. */
31201 if (MEM_P (fnaddr))
31202 emit_jump_insn (gen_indirect_jump (fnaddr));
31203 else
31204 {
31205 tmp = gen_rtx_MEM (QImode, fnaddr);
31206 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
31207 tmp = emit_call_insn (tmp);
31208 SIBLING_CALL_P (tmp) = 1;
31209 }
31210 emit_barrier ();
31211
31212 /* Emit just enough of rest_of_compilation to get the insns emitted.
31213 Note that use_thunk calls assemble_start_function et al. */
31214 tmp = get_insns ();
31215 insn_locators_alloc ();
31216 shorten_branches (tmp);
31217 final_start_function (tmp, file, 1);
31218 final (tmp, file, 1);
31219 final_end_function ();
31220 }
31221
31222 static void
31223 x86_file_start (void)
31224 {
31225 default_file_start ();
31226 #if TARGET_MACHO
31227 darwin_file_start ();
31228 #endif
31229 if (X86_FILE_START_VERSION_DIRECTIVE)
31230 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
31231 if (X86_FILE_START_FLTUSED)
31232 fputs ("\t.global\t__fltused\n", asm_out_file);
31233 if (ix86_asm_dialect == ASM_INTEL)
31234 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
31235 }
31236
31237 int
31238 x86_field_alignment (tree field, int computed)
31239 {
31240 enum machine_mode mode;
31241 tree type = TREE_TYPE (field);
31242
31243 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
31244 return computed;
31245 mode = TYPE_MODE (strip_array_types (type));
31246 if (mode == DFmode || mode == DCmode
31247 || GET_MODE_CLASS (mode) == MODE_INT
31248 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
31249 return MIN (32, computed);
31250 return computed;
31251 }
31252
31253 /* Output assembler code to FILE to increment profiler label # LABELNO
31254 for profiling a function entry. */
31255 void
31256 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
31257 {
31258 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
31259 : MCOUNT_NAME);
31260
31261 if (TARGET_64BIT)
31262 {
31263 #ifndef NO_PROFILE_COUNTERS
31264 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
31265 #endif
31266
31267 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
31268 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
31269 else
31270 fprintf (file, "\tcall\t%s\n", mcount_name);
31271 }
31272 else if (flag_pic)
31273 {
31274 #ifndef NO_PROFILE_COUNTERS
31275 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
31276 LPREFIX, labelno);
31277 #endif
31278 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
31279 }
31280 else
31281 {
31282 #ifndef NO_PROFILE_COUNTERS
31283 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
31284 LPREFIX, labelno);
31285 #endif
31286 fprintf (file, "\tcall\t%s\n", mcount_name);
31287 }
31288 }
31289
31290 /* We don't have exact information about the insn sizes, but we may assume
31291 quite safely that we are informed about all 1 byte insns and memory
31292 address sizes. This is enough to eliminate unnecessary padding in
31293 99% of cases. */
31294
31295 static int
31296 min_insn_size (rtx insn)
31297 {
31298 int l = 0, len;
31299
31300 if (!INSN_P (insn) || !active_insn_p (insn))
31301 return 0;
31302
31303 /* Discard alignments we've emit and jump instructions. */
31304 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
31305 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
31306 return 0;
31307 if (JUMP_TABLE_DATA_P (insn))
31308 return 0;
31309
31310 /* Important case - calls are always 5 bytes.
31311 It is common to have many calls in the row. */
31312 if (CALL_P (insn)
31313 && symbolic_reference_mentioned_p (PATTERN (insn))
31314 && !SIBLING_CALL_P (insn))
31315 return 5;
31316 len = get_attr_length (insn);
31317 if (len <= 1)
31318 return 1;
31319
31320 /* For normal instructions we rely on get_attr_length being exact,
31321 with a few exceptions. */
31322 if (!JUMP_P (insn))
31323 {
31324 enum attr_type type = get_attr_type (insn);
31325
31326 switch (type)
31327 {
31328 case TYPE_MULTI:
31329 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
31330 || asm_noperands (PATTERN (insn)) >= 0)
31331 return 0;
31332 break;
31333 case TYPE_OTHER:
31334 case TYPE_FCMP:
31335 break;
31336 default:
31337 /* Otherwise trust get_attr_length. */
31338 return len;
31339 }
31340
31341 l = get_attr_length_address (insn);
31342 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
31343 l = 4;
31344 }
31345 if (l)
31346 return 1+l;
31347 else
31348 return 2;
31349 }
31350
31351 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31352
31353 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31354 window. */
31355
31356 static void
31357 ix86_avoid_jump_mispredicts (void)
31358 {
31359 rtx insn, start = get_insns ();
31360 int nbytes = 0, njumps = 0;
31361 int isjump = 0;
31362
31363 /* Look for all minimal intervals of instructions containing 4 jumps.
31364 The intervals are bounded by START and INSN. NBYTES is the total
31365 size of instructions in the interval including INSN and not including
31366 START. When the NBYTES is smaller than 16 bytes, it is possible
31367 that the end of START and INSN ends up in the same 16byte page.
31368
31369 The smallest offset in the page INSN can start is the case where START
31370 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31371 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
31372 */
31373 for (insn = start; insn; insn = NEXT_INSN (insn))
31374 {
31375 int min_size;
31376
31377 if (LABEL_P (insn))
31378 {
31379 int align = label_to_alignment (insn);
31380 int max_skip = label_to_max_skip (insn);
31381
31382 if (max_skip > 15)
31383 max_skip = 15;
31384 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
31385 already in the current 16 byte page, because otherwise
31386 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
31387 bytes to reach 16 byte boundary. */
31388 if (align <= 0
31389 || (align <= 3 && max_skip != (1 << align) - 1))
31390 max_skip = 0;
31391 if (dump_file)
31392 fprintf (dump_file, "Label %i with max_skip %i\n",
31393 INSN_UID (insn), max_skip);
31394 if (max_skip)
31395 {
31396 while (nbytes + max_skip >= 16)
31397 {
31398 start = NEXT_INSN (start);
31399 if ((JUMP_P (start)
31400 && GET_CODE (PATTERN (start)) != ADDR_VEC
31401 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31402 || CALL_P (start))
31403 njumps--, isjump = 1;
31404 else
31405 isjump = 0;
31406 nbytes -= min_insn_size (start);
31407 }
31408 }
31409 continue;
31410 }
31411
31412 min_size = min_insn_size (insn);
31413 nbytes += min_size;
31414 if (dump_file)
31415 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
31416 INSN_UID (insn), min_size);
31417 if ((JUMP_P (insn)
31418 && GET_CODE (PATTERN (insn)) != ADDR_VEC
31419 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
31420 || CALL_P (insn))
31421 njumps++;
31422 else
31423 continue;
31424
31425 while (njumps > 3)
31426 {
31427 start = NEXT_INSN (start);
31428 if ((JUMP_P (start)
31429 && GET_CODE (PATTERN (start)) != ADDR_VEC
31430 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31431 || CALL_P (start))
31432 njumps--, isjump = 1;
31433 else
31434 isjump = 0;
31435 nbytes -= min_insn_size (start);
31436 }
31437 gcc_assert (njumps >= 0);
31438 if (dump_file)
31439 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
31440 INSN_UID (start), INSN_UID (insn), nbytes);
31441
31442 if (njumps == 3 && isjump && nbytes < 16)
31443 {
31444 int padsize = 15 - nbytes + min_insn_size (insn);
31445
31446 if (dump_file)
31447 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
31448 INSN_UID (insn), padsize);
31449 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
31450 }
31451 }
31452 }
31453 #endif
31454
31455 /* AMD Athlon works faster
31456 when RET is not destination of conditional jump or directly preceded
31457 by other jump instruction. We avoid the penalty by inserting NOP just
31458 before the RET instructions in such cases. */
31459 static void
31460 ix86_pad_returns (void)
31461 {
31462 edge e;
31463 edge_iterator ei;
31464
31465 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31466 {
31467 basic_block bb = e->src;
31468 rtx ret = BB_END (bb);
31469 rtx prev;
31470 bool replace = false;
31471
31472 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
31473 || optimize_bb_for_size_p (bb))
31474 continue;
31475 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
31476 if (active_insn_p (prev) || LABEL_P (prev))
31477 break;
31478 if (prev && LABEL_P (prev))
31479 {
31480 edge e;
31481 edge_iterator ei;
31482
31483 FOR_EACH_EDGE (e, ei, bb->preds)
31484 if (EDGE_FREQUENCY (e) && e->src->index >= 0
31485 && !(e->flags & EDGE_FALLTHRU))
31486 replace = true;
31487 }
31488 if (!replace)
31489 {
31490 prev = prev_active_insn (ret);
31491 if (prev
31492 && ((JUMP_P (prev) && any_condjump_p (prev))
31493 || CALL_P (prev)))
31494 replace = true;
31495 /* Empty functions get branch mispredict even when
31496 the jump destination is not visible to us. */
31497 if (!prev && !optimize_function_for_size_p (cfun))
31498 replace = true;
31499 }
31500 if (replace)
31501 {
31502 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
31503 delete_insn (ret);
31504 }
31505 }
31506 }
31507
31508 /* Count the minimum number of instructions in BB. Return 4 if the
31509 number of instructions >= 4. */
31510
31511 static int
31512 ix86_count_insn_bb (basic_block bb)
31513 {
31514 rtx insn;
31515 int insn_count = 0;
31516
31517 /* Count number of instructions in this block. Return 4 if the number
31518 of instructions >= 4. */
31519 FOR_BB_INSNS (bb, insn)
31520 {
31521 /* Only happen in exit blocks. */
31522 if (JUMP_P (insn)
31523 && ANY_RETURN_P (PATTERN (insn)))
31524 break;
31525
31526 if (NONDEBUG_INSN_P (insn)
31527 && GET_CODE (PATTERN (insn)) != USE
31528 && GET_CODE (PATTERN (insn)) != CLOBBER)
31529 {
31530 insn_count++;
31531 if (insn_count >= 4)
31532 return insn_count;
31533 }
31534 }
31535
31536 return insn_count;
31537 }
31538
31539
31540 /* Count the minimum number of instructions in code path in BB.
31541 Return 4 if the number of instructions >= 4. */
31542
31543 static int
31544 ix86_count_insn (basic_block bb)
31545 {
31546 edge e;
31547 edge_iterator ei;
31548 int min_prev_count;
31549
31550 /* Only bother counting instructions along paths with no
31551 more than 2 basic blocks between entry and exit. Given
31552 that BB has an edge to exit, determine if a predecessor
31553 of BB has an edge from entry. If so, compute the number
31554 of instructions in the predecessor block. If there
31555 happen to be multiple such blocks, compute the minimum. */
31556 min_prev_count = 4;
31557 FOR_EACH_EDGE (e, ei, bb->preds)
31558 {
31559 edge prev_e;
31560 edge_iterator prev_ei;
31561
31562 if (e->src == ENTRY_BLOCK_PTR)
31563 {
31564 min_prev_count = 0;
31565 break;
31566 }
31567 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
31568 {
31569 if (prev_e->src == ENTRY_BLOCK_PTR)
31570 {
31571 int count = ix86_count_insn_bb (e->src);
31572 if (count < min_prev_count)
31573 min_prev_count = count;
31574 break;
31575 }
31576 }
31577 }
31578
31579 if (min_prev_count < 4)
31580 min_prev_count += ix86_count_insn_bb (bb);
31581
31582 return min_prev_count;
31583 }
31584
31585 /* Pad short funtion to 4 instructions. */
31586
31587 static void
31588 ix86_pad_short_function (void)
31589 {
31590 edge e;
31591 edge_iterator ei;
31592
31593 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31594 {
31595 rtx ret = BB_END (e->src);
31596 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
31597 {
31598 int insn_count = ix86_count_insn (e->src);
31599
31600 /* Pad short function. */
31601 if (insn_count < 4)
31602 {
31603 rtx insn = ret;
31604
31605 /* Find epilogue. */
31606 while (insn
31607 && (!NOTE_P (insn)
31608 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
31609 insn = PREV_INSN (insn);
31610
31611 if (!insn)
31612 insn = ret;
31613
31614 /* Two NOPs count as one instruction. */
31615 insn_count = 2 * (4 - insn_count);
31616 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
31617 }
31618 }
31619 }
31620 }
31621
31622 /* Implement machine specific optimizations. We implement padding of returns
31623 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31624 static void
31625 ix86_reorg (void)
31626 {
31627 /* We are freeing block_for_insn in the toplev to keep compatibility
31628 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31629 compute_bb_for_insn ();
31630
31631 /* Run the vzeroupper optimization if needed. */
31632 if (TARGET_VZEROUPPER)
31633 move_or_delete_vzeroupper ();
31634
31635 if (optimize && optimize_function_for_speed_p (cfun))
31636 {
31637 if (TARGET_PAD_SHORT_FUNCTION)
31638 ix86_pad_short_function ();
31639 else if (TARGET_PAD_RETURNS)
31640 ix86_pad_returns ();
31641 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31642 if (TARGET_FOUR_JUMP_LIMIT)
31643 ix86_avoid_jump_mispredicts ();
31644 #endif
31645 }
31646 }
31647
31648 /* Return nonzero when QImode register that must be represented via REX prefix
31649 is used. */
31650 bool
31651 x86_extended_QIreg_mentioned_p (rtx insn)
31652 {
31653 int i;
31654 extract_insn_cached (insn);
31655 for (i = 0; i < recog_data.n_operands; i++)
31656 if (REG_P (recog_data.operand[i])
31657 && REGNO (recog_data.operand[i]) > BX_REG)
31658 return true;
31659 return false;
31660 }
31661
31662 /* Return nonzero when P points to register encoded via REX prefix.
31663 Called via for_each_rtx. */
31664 static int
31665 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
31666 {
31667 unsigned int regno;
31668 if (!REG_P (*p))
31669 return 0;
31670 regno = REGNO (*p);
31671 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
31672 }
31673
31674 /* Return true when INSN mentions register that must be encoded using REX
31675 prefix. */
31676 bool
31677 x86_extended_reg_mentioned_p (rtx insn)
31678 {
31679 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
31680 extended_reg_mentioned_1, NULL);
31681 }
31682
31683 /* If profitable, negate (without causing overflow) integer constant
31684 of mode MODE at location LOC. Return true in this case. */
31685 bool
31686 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
31687 {
31688 HOST_WIDE_INT val;
31689
31690 if (!CONST_INT_P (*loc))
31691 return false;
31692
31693 switch (mode)
31694 {
31695 case DImode:
31696 /* DImode x86_64 constants must fit in 32 bits. */
31697 gcc_assert (x86_64_immediate_operand (*loc, mode));
31698
31699 mode = SImode;
31700 break;
31701
31702 case SImode:
31703 case HImode:
31704 case QImode:
31705 break;
31706
31707 default:
31708 gcc_unreachable ();
31709 }
31710
31711 /* Avoid overflows. */
31712 if (mode_signbit_p (mode, *loc))
31713 return false;
31714
31715 val = INTVAL (*loc);
31716
31717 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31718 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31719 if ((val < 0 && val != -128)
31720 || val == 128)
31721 {
31722 *loc = GEN_INT (-val);
31723 return true;
31724 }
31725
31726 return false;
31727 }
31728
31729 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31730 optabs would emit if we didn't have TFmode patterns. */
31731
31732 void
31733 x86_emit_floatuns (rtx operands[2])
31734 {
31735 rtx neglab, donelab, i0, i1, f0, in, out;
31736 enum machine_mode mode, inmode;
31737
31738 inmode = GET_MODE (operands[1]);
31739 gcc_assert (inmode == SImode || inmode == DImode);
31740
31741 out = operands[0];
31742 in = force_reg (inmode, operands[1]);
31743 mode = GET_MODE (out);
31744 neglab = gen_label_rtx ();
31745 donelab = gen_label_rtx ();
31746 f0 = gen_reg_rtx (mode);
31747
31748 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
31749
31750 expand_float (out, in, 0);
31751
31752 emit_jump_insn (gen_jump (donelab));
31753 emit_barrier ();
31754
31755 emit_label (neglab);
31756
31757 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
31758 1, OPTAB_DIRECT);
31759 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
31760 1, OPTAB_DIRECT);
31761 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
31762
31763 expand_float (f0, i0, 0);
31764
31765 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
31766
31767 emit_label (donelab);
31768 }
31769 \f
31770 /* AVX does not support 32-byte integer vector operations,
31771 thus the longest vector we are faced with is V16QImode. */
31772 #define MAX_VECT_LEN 16
31773
31774 struct expand_vec_perm_d
31775 {
31776 rtx target, op0, op1;
31777 unsigned char perm[MAX_VECT_LEN];
31778 enum machine_mode vmode;
31779 unsigned char nelt;
31780 bool testing_p;
31781 };
31782
31783 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
31784 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
31785 static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
31786 static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
31787
31788
31789 /* Get a vector mode of the same size as the original but with elements
31790 twice as wide. This is only guaranteed to apply to integral vectors. */
31791
31792 static inline enum machine_mode
31793 get_mode_wider_vector (enum machine_mode o)
31794 {
31795 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31796 enum machine_mode n = GET_MODE_WIDER_MODE (o);
31797 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
31798 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
31799 return n;
31800 }
31801
31802 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31803 with all elements equal to VAR. Return true if successful. */
31804
31805 static bool
31806 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
31807 rtx target, rtx val)
31808 {
31809 bool ok;
31810
31811 switch (mode)
31812 {
31813 case V2SImode:
31814 case V2SFmode:
31815 if (!mmx_ok)
31816 return false;
31817 /* FALLTHRU */
31818
31819 case V4DFmode:
31820 case V4DImode:
31821 case V8SFmode:
31822 case V8SImode:
31823 case V2DFmode:
31824 case V2DImode:
31825 case V4SFmode:
31826 case V4SImode:
31827 {
31828 rtx insn, dup;
31829
31830 /* First attempt to recognize VAL as-is. */
31831 dup = gen_rtx_VEC_DUPLICATE (mode, val);
31832 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
31833 if (recog_memoized (insn) < 0)
31834 {
31835 rtx seq;
31836 /* If that fails, force VAL into a register. */
31837
31838 start_sequence ();
31839 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
31840 seq = get_insns ();
31841 end_sequence ();
31842 if (seq)
31843 emit_insn_before (seq, insn);
31844
31845 ok = recog_memoized (insn) >= 0;
31846 gcc_assert (ok);
31847 }
31848 }
31849 return true;
31850
31851 case V4HImode:
31852 if (!mmx_ok)
31853 return false;
31854 if (TARGET_SSE || TARGET_3DNOW_A)
31855 {
31856 rtx x;
31857
31858 val = gen_lowpart (SImode, val);
31859 x = gen_rtx_TRUNCATE (HImode, val);
31860 x = gen_rtx_VEC_DUPLICATE (mode, x);
31861 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31862 return true;
31863 }
31864 goto widen;
31865
31866 case V8QImode:
31867 if (!mmx_ok)
31868 return false;
31869 goto widen;
31870
31871 case V8HImode:
31872 if (TARGET_SSE2)
31873 {
31874 struct expand_vec_perm_d dperm;
31875 rtx tmp1, tmp2;
31876
31877 permute:
31878 memset (&dperm, 0, sizeof (dperm));
31879 dperm.target = target;
31880 dperm.vmode = mode;
31881 dperm.nelt = GET_MODE_NUNITS (mode);
31882 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31883
31884 /* Extend to SImode using a paradoxical SUBREG. */
31885 tmp1 = gen_reg_rtx (SImode);
31886 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31887
31888 /* Insert the SImode value as low element of a V4SImode vector. */
31889 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31890 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31891
31892 ok = (expand_vec_perm_1 (&dperm)
31893 || expand_vec_perm_broadcast_1 (&dperm));
31894 gcc_assert (ok);
31895 return ok;
31896 }
31897 goto widen;
31898
31899 case V16QImode:
31900 if (TARGET_SSE2)
31901 goto permute;
31902 goto widen;
31903
31904 widen:
31905 /* Replicate the value once into the next wider mode and recurse. */
31906 {
31907 enum machine_mode smode, wsmode, wvmode;
31908 rtx x;
31909
31910 smode = GET_MODE_INNER (mode);
31911 wvmode = get_mode_wider_vector (mode);
31912 wsmode = GET_MODE_INNER (wvmode);
31913
31914 val = convert_modes (wsmode, smode, val, true);
31915 x = expand_simple_binop (wsmode, ASHIFT, val,
31916 GEN_INT (GET_MODE_BITSIZE (smode)),
31917 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31918 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
31919
31920 x = gen_lowpart (wvmode, target);
31921 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
31922 gcc_assert (ok);
31923 return ok;
31924 }
31925
31926 case V16HImode:
31927 case V32QImode:
31928 {
31929 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
31930 rtx x = gen_reg_rtx (hvmode);
31931
31932 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
31933 gcc_assert (ok);
31934
31935 x = gen_rtx_VEC_CONCAT (mode, x, x);
31936 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31937 }
31938 return true;
31939
31940 default:
31941 return false;
31942 }
31943 }
31944
31945 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31946 whose ONE_VAR element is VAR, and other elements are zero. Return true
31947 if successful. */
31948
31949 static bool
31950 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
31951 rtx target, rtx var, int one_var)
31952 {
31953 enum machine_mode vsimode;
31954 rtx new_target;
31955 rtx x, tmp;
31956 bool use_vector_set = false;
31957
31958 switch (mode)
31959 {
31960 case V2DImode:
31961 /* For SSE4.1, we normally use vector set. But if the second
31962 element is zero and inter-unit moves are OK, we use movq
31963 instead. */
31964 use_vector_set = (TARGET_64BIT
31965 && TARGET_SSE4_1
31966 && !(TARGET_INTER_UNIT_MOVES
31967 && one_var == 0));
31968 break;
31969 case V16QImode:
31970 case V4SImode:
31971 case V4SFmode:
31972 use_vector_set = TARGET_SSE4_1;
31973 break;
31974 case V8HImode:
31975 use_vector_set = TARGET_SSE2;
31976 break;
31977 case V4HImode:
31978 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
31979 break;
31980 case V32QImode:
31981 case V16HImode:
31982 case V8SImode:
31983 case V8SFmode:
31984 case V4DFmode:
31985 use_vector_set = TARGET_AVX;
31986 break;
31987 case V4DImode:
31988 /* Use ix86_expand_vector_set in 64bit mode only. */
31989 use_vector_set = TARGET_AVX && TARGET_64BIT;
31990 break;
31991 default:
31992 break;
31993 }
31994
31995 if (use_vector_set)
31996 {
31997 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
31998 var = force_reg (GET_MODE_INNER (mode), var);
31999 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32000 return true;
32001 }
32002
32003 switch (mode)
32004 {
32005 case V2SFmode:
32006 case V2SImode:
32007 if (!mmx_ok)
32008 return false;
32009 /* FALLTHRU */
32010
32011 case V2DFmode:
32012 case V2DImode:
32013 if (one_var != 0)
32014 return false;
32015 var = force_reg (GET_MODE_INNER (mode), var);
32016 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
32017 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32018 return true;
32019
32020 case V4SFmode:
32021 case V4SImode:
32022 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
32023 new_target = gen_reg_rtx (mode);
32024 else
32025 new_target = target;
32026 var = force_reg (GET_MODE_INNER (mode), var);
32027 x = gen_rtx_VEC_DUPLICATE (mode, var);
32028 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
32029 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
32030 if (one_var != 0)
32031 {
32032 /* We need to shuffle the value to the correct position, so
32033 create a new pseudo to store the intermediate result. */
32034
32035 /* With SSE2, we can use the integer shuffle insns. */
32036 if (mode != V4SFmode && TARGET_SSE2)
32037 {
32038 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
32039 const1_rtx,
32040 GEN_INT (one_var == 1 ? 0 : 1),
32041 GEN_INT (one_var == 2 ? 0 : 1),
32042 GEN_INT (one_var == 3 ? 0 : 1)));
32043 if (target != new_target)
32044 emit_move_insn (target, new_target);
32045 return true;
32046 }
32047
32048 /* Otherwise convert the intermediate result to V4SFmode and
32049 use the SSE1 shuffle instructions. */
32050 if (mode != V4SFmode)
32051 {
32052 tmp = gen_reg_rtx (V4SFmode);
32053 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
32054 }
32055 else
32056 tmp = new_target;
32057
32058 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
32059 const1_rtx,
32060 GEN_INT (one_var == 1 ? 0 : 1),
32061 GEN_INT (one_var == 2 ? 0+4 : 1+4),
32062 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
32063
32064 if (mode != V4SFmode)
32065 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
32066 else if (tmp != target)
32067 emit_move_insn (target, tmp);
32068 }
32069 else if (target != new_target)
32070 emit_move_insn (target, new_target);
32071 return true;
32072
32073 case V8HImode:
32074 case V16QImode:
32075 vsimode = V4SImode;
32076 goto widen;
32077 case V4HImode:
32078 case V8QImode:
32079 if (!mmx_ok)
32080 return false;
32081 vsimode = V2SImode;
32082 goto widen;
32083 widen:
32084 if (one_var != 0)
32085 return false;
32086
32087 /* Zero extend the variable element to SImode and recurse. */
32088 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
32089
32090 x = gen_reg_rtx (vsimode);
32091 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
32092 var, one_var))
32093 gcc_unreachable ();
32094
32095 emit_move_insn (target, gen_lowpart (mode, x));
32096 return true;
32097
32098 default:
32099 return false;
32100 }
32101 }
32102
32103 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32104 consisting of the values in VALS. It is known that all elements
32105 except ONE_VAR are constants. Return true if successful. */
32106
32107 static bool
32108 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
32109 rtx target, rtx vals, int one_var)
32110 {
32111 rtx var = XVECEXP (vals, 0, one_var);
32112 enum machine_mode wmode;
32113 rtx const_vec, x;
32114
32115 const_vec = copy_rtx (vals);
32116 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
32117 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
32118
32119 switch (mode)
32120 {
32121 case V2DFmode:
32122 case V2DImode:
32123 case V2SFmode:
32124 case V2SImode:
32125 /* For the two element vectors, it's just as easy to use
32126 the general case. */
32127 return false;
32128
32129 case V4DImode:
32130 /* Use ix86_expand_vector_set in 64bit mode only. */
32131 if (!TARGET_64BIT)
32132 return false;
32133 case V4DFmode:
32134 case V8SFmode:
32135 case V8SImode:
32136 case V16HImode:
32137 case V32QImode:
32138 case V4SFmode:
32139 case V4SImode:
32140 case V8HImode:
32141 case V4HImode:
32142 break;
32143
32144 case V16QImode:
32145 if (TARGET_SSE4_1)
32146 break;
32147 wmode = V8HImode;
32148 goto widen;
32149 case V8QImode:
32150 wmode = V4HImode;
32151 goto widen;
32152 widen:
32153 /* There's no way to set one QImode entry easily. Combine
32154 the variable value with its adjacent constant value, and
32155 promote to an HImode set. */
32156 x = XVECEXP (vals, 0, one_var ^ 1);
32157 if (one_var & 1)
32158 {
32159 var = convert_modes (HImode, QImode, var, true);
32160 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
32161 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32162 x = GEN_INT (INTVAL (x) & 0xff);
32163 }
32164 else
32165 {
32166 var = convert_modes (HImode, QImode, var, true);
32167 x = gen_int_mode (INTVAL (x) << 8, HImode);
32168 }
32169 if (x != const0_rtx)
32170 var = expand_simple_binop (HImode, IOR, var, x, var,
32171 1, OPTAB_LIB_WIDEN);
32172
32173 x = gen_reg_rtx (wmode);
32174 emit_move_insn (x, gen_lowpart (wmode, const_vec));
32175 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
32176
32177 emit_move_insn (target, gen_lowpart (mode, x));
32178 return true;
32179
32180 default:
32181 return false;
32182 }
32183
32184 emit_move_insn (target, const_vec);
32185 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32186 return true;
32187 }
32188
32189 /* A subroutine of ix86_expand_vector_init_general. Use vector
32190 concatenate to handle the most general case: all values variable,
32191 and none identical. */
32192
32193 static void
32194 ix86_expand_vector_init_concat (enum machine_mode mode,
32195 rtx target, rtx *ops, int n)
32196 {
32197 enum machine_mode cmode, hmode = VOIDmode;
32198 rtx first[8], second[4];
32199 rtvec v;
32200 int i, j;
32201
32202 switch (n)
32203 {
32204 case 2:
32205 switch (mode)
32206 {
32207 case V8SImode:
32208 cmode = V4SImode;
32209 break;
32210 case V8SFmode:
32211 cmode = V4SFmode;
32212 break;
32213 case V4DImode:
32214 cmode = V2DImode;
32215 break;
32216 case V4DFmode:
32217 cmode = V2DFmode;
32218 break;
32219 case V4SImode:
32220 cmode = V2SImode;
32221 break;
32222 case V4SFmode:
32223 cmode = V2SFmode;
32224 break;
32225 case V2DImode:
32226 cmode = DImode;
32227 break;
32228 case V2SImode:
32229 cmode = SImode;
32230 break;
32231 case V2DFmode:
32232 cmode = DFmode;
32233 break;
32234 case V2SFmode:
32235 cmode = SFmode;
32236 break;
32237 default:
32238 gcc_unreachable ();
32239 }
32240
32241 if (!register_operand (ops[1], cmode))
32242 ops[1] = force_reg (cmode, ops[1]);
32243 if (!register_operand (ops[0], cmode))
32244 ops[0] = force_reg (cmode, ops[0]);
32245 emit_insn (gen_rtx_SET (VOIDmode, target,
32246 gen_rtx_VEC_CONCAT (mode, ops[0],
32247 ops[1])));
32248 break;
32249
32250 case 4:
32251 switch (mode)
32252 {
32253 case V4DImode:
32254 cmode = V2DImode;
32255 break;
32256 case V4DFmode:
32257 cmode = V2DFmode;
32258 break;
32259 case V4SImode:
32260 cmode = V2SImode;
32261 break;
32262 case V4SFmode:
32263 cmode = V2SFmode;
32264 break;
32265 default:
32266 gcc_unreachable ();
32267 }
32268 goto half;
32269
32270 case 8:
32271 switch (mode)
32272 {
32273 case V8SImode:
32274 cmode = V2SImode;
32275 hmode = V4SImode;
32276 break;
32277 case V8SFmode:
32278 cmode = V2SFmode;
32279 hmode = V4SFmode;
32280 break;
32281 default:
32282 gcc_unreachable ();
32283 }
32284 goto half;
32285
32286 half:
32287 /* FIXME: We process inputs backward to help RA. PR 36222. */
32288 i = n - 1;
32289 j = (n >> 1) - 1;
32290 for (; i > 0; i -= 2, j--)
32291 {
32292 first[j] = gen_reg_rtx (cmode);
32293 v = gen_rtvec (2, ops[i - 1], ops[i]);
32294 ix86_expand_vector_init (false, first[j],
32295 gen_rtx_PARALLEL (cmode, v));
32296 }
32297
32298 n >>= 1;
32299 if (n > 2)
32300 {
32301 gcc_assert (hmode != VOIDmode);
32302 for (i = j = 0; i < n; i += 2, j++)
32303 {
32304 second[j] = gen_reg_rtx (hmode);
32305 ix86_expand_vector_init_concat (hmode, second [j],
32306 &first [i], 2);
32307 }
32308 n >>= 1;
32309 ix86_expand_vector_init_concat (mode, target, second, n);
32310 }
32311 else
32312 ix86_expand_vector_init_concat (mode, target, first, n);
32313 break;
32314
32315 default:
32316 gcc_unreachable ();
32317 }
32318 }
32319
32320 /* A subroutine of ix86_expand_vector_init_general. Use vector
32321 interleave to handle the most general case: all values variable,
32322 and none identical. */
32323
32324 static void
32325 ix86_expand_vector_init_interleave (enum machine_mode mode,
32326 rtx target, rtx *ops, int n)
32327 {
32328 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
32329 int i, j;
32330 rtx op0, op1;
32331 rtx (*gen_load_even) (rtx, rtx, rtx);
32332 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
32333 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
32334
32335 switch (mode)
32336 {
32337 case V8HImode:
32338 gen_load_even = gen_vec_setv8hi;
32339 gen_interleave_first_low = gen_vec_interleave_lowv4si;
32340 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32341 inner_mode = HImode;
32342 first_imode = V4SImode;
32343 second_imode = V2DImode;
32344 third_imode = VOIDmode;
32345 break;
32346 case V16QImode:
32347 gen_load_even = gen_vec_setv16qi;
32348 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
32349 gen_interleave_second_low = gen_vec_interleave_lowv4si;
32350 inner_mode = QImode;
32351 first_imode = V8HImode;
32352 second_imode = V4SImode;
32353 third_imode = V2DImode;
32354 break;
32355 default:
32356 gcc_unreachable ();
32357 }
32358
32359 for (i = 0; i < n; i++)
32360 {
32361 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32362 op0 = gen_reg_rtx (SImode);
32363 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
32364
32365 /* Insert the SImode value as low element of V4SImode vector. */
32366 op1 = gen_reg_rtx (V4SImode);
32367 op0 = gen_rtx_VEC_MERGE (V4SImode,
32368 gen_rtx_VEC_DUPLICATE (V4SImode,
32369 op0),
32370 CONST0_RTX (V4SImode),
32371 const1_rtx);
32372 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
32373
32374 /* Cast the V4SImode vector back to a vector in orignal mode. */
32375 op0 = gen_reg_rtx (mode);
32376 emit_move_insn (op0, gen_lowpart (mode, op1));
32377
32378 /* Load even elements into the second positon. */
32379 emit_insn (gen_load_even (op0,
32380 force_reg (inner_mode,
32381 ops [i + i + 1]),
32382 const1_rtx));
32383
32384 /* Cast vector to FIRST_IMODE vector. */
32385 ops[i] = gen_reg_rtx (first_imode);
32386 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
32387 }
32388
32389 /* Interleave low FIRST_IMODE vectors. */
32390 for (i = j = 0; i < n; i += 2, j++)
32391 {
32392 op0 = gen_reg_rtx (first_imode);
32393 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
32394
32395 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32396 ops[j] = gen_reg_rtx (second_imode);
32397 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
32398 }
32399
32400 /* Interleave low SECOND_IMODE vectors. */
32401 switch (second_imode)
32402 {
32403 case V4SImode:
32404 for (i = j = 0; i < n / 2; i += 2, j++)
32405 {
32406 op0 = gen_reg_rtx (second_imode);
32407 emit_insn (gen_interleave_second_low (op0, ops[i],
32408 ops[i + 1]));
32409
32410 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32411 vector. */
32412 ops[j] = gen_reg_rtx (third_imode);
32413 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
32414 }
32415 second_imode = V2DImode;
32416 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32417 /* FALLTHRU */
32418
32419 case V2DImode:
32420 op0 = gen_reg_rtx (second_imode);
32421 emit_insn (gen_interleave_second_low (op0, ops[0],
32422 ops[1]));
32423
32424 /* Cast the SECOND_IMODE vector back to a vector on original
32425 mode. */
32426 emit_insn (gen_rtx_SET (VOIDmode, target,
32427 gen_lowpart (mode, op0)));
32428 break;
32429
32430 default:
32431 gcc_unreachable ();
32432 }
32433 }
32434
32435 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32436 all values variable, and none identical. */
32437
32438 static void
32439 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
32440 rtx target, rtx vals)
32441 {
32442 rtx ops[32], op0, op1;
32443 enum machine_mode half_mode = VOIDmode;
32444 int n, i;
32445
32446 switch (mode)
32447 {
32448 case V2SFmode:
32449 case V2SImode:
32450 if (!mmx_ok && !TARGET_SSE)
32451 break;
32452 /* FALLTHRU */
32453
32454 case V8SFmode:
32455 case V8SImode:
32456 case V4DFmode:
32457 case V4DImode:
32458 case V4SFmode:
32459 case V4SImode:
32460 case V2DFmode:
32461 case V2DImode:
32462 n = GET_MODE_NUNITS (mode);
32463 for (i = 0; i < n; i++)
32464 ops[i] = XVECEXP (vals, 0, i);
32465 ix86_expand_vector_init_concat (mode, target, ops, n);
32466 return;
32467
32468 case V32QImode:
32469 half_mode = V16QImode;
32470 goto half;
32471
32472 case V16HImode:
32473 half_mode = V8HImode;
32474 goto half;
32475
32476 half:
32477 n = GET_MODE_NUNITS (mode);
32478 for (i = 0; i < n; i++)
32479 ops[i] = XVECEXP (vals, 0, i);
32480 op0 = gen_reg_rtx (half_mode);
32481 op1 = gen_reg_rtx (half_mode);
32482 ix86_expand_vector_init_interleave (half_mode, op0, ops,
32483 n >> 2);
32484 ix86_expand_vector_init_interleave (half_mode, op1,
32485 &ops [n >> 1], n >> 2);
32486 emit_insn (gen_rtx_SET (VOIDmode, target,
32487 gen_rtx_VEC_CONCAT (mode, op0, op1)));
32488 return;
32489
32490 case V16QImode:
32491 if (!TARGET_SSE4_1)
32492 break;
32493 /* FALLTHRU */
32494
32495 case V8HImode:
32496 if (!TARGET_SSE2)
32497 break;
32498
32499 /* Don't use ix86_expand_vector_init_interleave if we can't
32500 move from GPR to SSE register directly. */
32501 if (!TARGET_INTER_UNIT_MOVES)
32502 break;
32503
32504 n = GET_MODE_NUNITS (mode);
32505 for (i = 0; i < n; i++)
32506 ops[i] = XVECEXP (vals, 0, i);
32507 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
32508 return;
32509
32510 case V4HImode:
32511 case V8QImode:
32512 break;
32513
32514 default:
32515 gcc_unreachable ();
32516 }
32517
32518 {
32519 int i, j, n_elts, n_words, n_elt_per_word;
32520 enum machine_mode inner_mode;
32521 rtx words[4], shift;
32522
32523 inner_mode = GET_MODE_INNER (mode);
32524 n_elts = GET_MODE_NUNITS (mode);
32525 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
32526 n_elt_per_word = n_elts / n_words;
32527 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
32528
32529 for (i = 0; i < n_words; ++i)
32530 {
32531 rtx word = NULL_RTX;
32532
32533 for (j = 0; j < n_elt_per_word; ++j)
32534 {
32535 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
32536 elt = convert_modes (word_mode, inner_mode, elt, true);
32537
32538 if (j == 0)
32539 word = elt;
32540 else
32541 {
32542 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
32543 word, 1, OPTAB_LIB_WIDEN);
32544 word = expand_simple_binop (word_mode, IOR, word, elt,
32545 word, 1, OPTAB_LIB_WIDEN);
32546 }
32547 }
32548
32549 words[i] = word;
32550 }
32551
32552 if (n_words == 1)
32553 emit_move_insn (target, gen_lowpart (mode, words[0]));
32554 else if (n_words == 2)
32555 {
32556 rtx tmp = gen_reg_rtx (mode);
32557 emit_clobber (tmp);
32558 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
32559 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
32560 emit_move_insn (target, tmp);
32561 }
32562 else if (n_words == 4)
32563 {
32564 rtx tmp = gen_reg_rtx (V4SImode);
32565 gcc_assert (word_mode == SImode);
32566 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
32567 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
32568 emit_move_insn (target, gen_lowpart (mode, tmp));
32569 }
32570 else
32571 gcc_unreachable ();
32572 }
32573 }
32574
32575 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32576 instructions unless MMX_OK is true. */
32577
32578 void
32579 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
32580 {
32581 enum machine_mode mode = GET_MODE (target);
32582 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32583 int n_elts = GET_MODE_NUNITS (mode);
32584 int n_var = 0, one_var = -1;
32585 bool all_same = true, all_const_zero = true;
32586 int i;
32587 rtx x;
32588
32589 for (i = 0; i < n_elts; ++i)
32590 {
32591 x = XVECEXP (vals, 0, i);
32592 if (!(CONST_INT_P (x)
32593 || GET_CODE (x) == CONST_DOUBLE
32594 || GET_CODE (x) == CONST_FIXED))
32595 n_var++, one_var = i;
32596 else if (x != CONST0_RTX (inner_mode))
32597 all_const_zero = false;
32598 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
32599 all_same = false;
32600 }
32601
32602 /* Constants are best loaded from the constant pool. */
32603 if (n_var == 0)
32604 {
32605 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
32606 return;
32607 }
32608
32609 /* If all values are identical, broadcast the value. */
32610 if (all_same
32611 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
32612 XVECEXP (vals, 0, 0)))
32613 return;
32614
32615 /* Values where only one field is non-constant are best loaded from
32616 the pool and overwritten via move later. */
32617 if (n_var == 1)
32618 {
32619 if (all_const_zero
32620 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
32621 XVECEXP (vals, 0, one_var),
32622 one_var))
32623 return;
32624
32625 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
32626 return;
32627 }
32628
32629 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
32630 }
32631
32632 void
32633 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
32634 {
32635 enum machine_mode mode = GET_MODE (target);
32636 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32637 enum machine_mode half_mode;
32638 bool use_vec_merge = false;
32639 rtx tmp;
32640 static rtx (*gen_extract[6][2]) (rtx, rtx)
32641 = {
32642 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
32643 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
32644 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
32645 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
32646 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
32647 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
32648 };
32649 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
32650 = {
32651 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
32652 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
32653 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
32654 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
32655 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
32656 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
32657 };
32658 int i, j, n;
32659
32660 switch (mode)
32661 {
32662 case V2SFmode:
32663 case V2SImode:
32664 if (mmx_ok)
32665 {
32666 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32667 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
32668 if (elt == 0)
32669 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32670 else
32671 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32672 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32673 return;
32674 }
32675 break;
32676
32677 case V2DImode:
32678 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
32679 if (use_vec_merge)
32680 break;
32681
32682 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32683 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
32684 if (elt == 0)
32685 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32686 else
32687 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32688 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32689 return;
32690
32691 case V2DFmode:
32692 {
32693 rtx op0, op1;
32694
32695 /* For the two element vectors, we implement a VEC_CONCAT with
32696 the extraction of the other element. */
32697
32698 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
32699 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
32700
32701 if (elt == 0)
32702 op0 = val, op1 = tmp;
32703 else
32704 op0 = tmp, op1 = val;
32705
32706 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
32707 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32708 }
32709 return;
32710
32711 case V4SFmode:
32712 use_vec_merge = TARGET_SSE4_1;
32713 if (use_vec_merge)
32714 break;
32715
32716 switch (elt)
32717 {
32718 case 0:
32719 use_vec_merge = true;
32720 break;
32721
32722 case 1:
32723 /* tmp = target = A B C D */
32724 tmp = copy_to_reg (target);
32725 /* target = A A B B */
32726 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
32727 /* target = X A B B */
32728 ix86_expand_vector_set (false, target, val, 0);
32729 /* target = A X C D */
32730 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32731 const1_rtx, const0_rtx,
32732 GEN_INT (2+4), GEN_INT (3+4)));
32733 return;
32734
32735 case 2:
32736 /* tmp = target = A B C D */
32737 tmp = copy_to_reg (target);
32738 /* tmp = X B C D */
32739 ix86_expand_vector_set (false, tmp, val, 0);
32740 /* target = A B X D */
32741 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32742 const0_rtx, const1_rtx,
32743 GEN_INT (0+4), GEN_INT (3+4)));
32744 return;
32745
32746 case 3:
32747 /* tmp = target = A B C D */
32748 tmp = copy_to_reg (target);
32749 /* tmp = X B C D */
32750 ix86_expand_vector_set (false, tmp, val, 0);
32751 /* target = A B X D */
32752 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32753 const0_rtx, const1_rtx,
32754 GEN_INT (2+4), GEN_INT (0+4)));
32755 return;
32756
32757 default:
32758 gcc_unreachable ();
32759 }
32760 break;
32761
32762 case V4SImode:
32763 use_vec_merge = TARGET_SSE4_1;
32764 if (use_vec_merge)
32765 break;
32766
32767 /* Element 0 handled by vec_merge below. */
32768 if (elt == 0)
32769 {
32770 use_vec_merge = true;
32771 break;
32772 }
32773
32774 if (TARGET_SSE2)
32775 {
32776 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32777 store into element 0, then shuffle them back. */
32778
32779 rtx order[4];
32780
32781 order[0] = GEN_INT (elt);
32782 order[1] = const1_rtx;
32783 order[2] = const2_rtx;
32784 order[3] = GEN_INT (3);
32785 order[elt] = const0_rtx;
32786
32787 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32788 order[1], order[2], order[3]));
32789
32790 ix86_expand_vector_set (false, target, val, 0);
32791
32792 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32793 order[1], order[2], order[3]));
32794 }
32795 else
32796 {
32797 /* For SSE1, we have to reuse the V4SF code. */
32798 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
32799 gen_lowpart (SFmode, val), elt);
32800 }
32801 return;
32802
32803 case V8HImode:
32804 use_vec_merge = TARGET_SSE2;
32805 break;
32806 case V4HImode:
32807 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32808 break;
32809
32810 case V16QImode:
32811 use_vec_merge = TARGET_SSE4_1;
32812 break;
32813
32814 case V8QImode:
32815 break;
32816
32817 case V32QImode:
32818 half_mode = V16QImode;
32819 j = 0;
32820 n = 16;
32821 goto half;
32822
32823 case V16HImode:
32824 half_mode = V8HImode;
32825 j = 1;
32826 n = 8;
32827 goto half;
32828
32829 case V8SImode:
32830 half_mode = V4SImode;
32831 j = 2;
32832 n = 4;
32833 goto half;
32834
32835 case V4DImode:
32836 half_mode = V2DImode;
32837 j = 3;
32838 n = 2;
32839 goto half;
32840
32841 case V8SFmode:
32842 half_mode = V4SFmode;
32843 j = 4;
32844 n = 4;
32845 goto half;
32846
32847 case V4DFmode:
32848 half_mode = V2DFmode;
32849 j = 5;
32850 n = 2;
32851 goto half;
32852
32853 half:
32854 /* Compute offset. */
32855 i = elt / n;
32856 elt %= n;
32857
32858 gcc_assert (i <= 1);
32859
32860 /* Extract the half. */
32861 tmp = gen_reg_rtx (half_mode);
32862 emit_insn (gen_extract[j][i] (tmp, target));
32863
32864 /* Put val in tmp at elt. */
32865 ix86_expand_vector_set (false, tmp, val, elt);
32866
32867 /* Put it back. */
32868 emit_insn (gen_insert[j][i] (target, target, tmp));
32869 return;
32870
32871 default:
32872 break;
32873 }
32874
32875 if (use_vec_merge)
32876 {
32877 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32878 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32879 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32880 }
32881 else
32882 {
32883 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32884
32885 emit_move_insn (mem, target);
32886
32887 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32888 emit_move_insn (tmp, val);
32889
32890 emit_move_insn (target, mem);
32891 }
32892 }
32893
32894 void
32895 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
32896 {
32897 enum machine_mode mode = GET_MODE (vec);
32898 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32899 bool use_vec_extr = false;
32900 rtx tmp;
32901
32902 switch (mode)
32903 {
32904 case V2SImode:
32905 case V2SFmode:
32906 if (!mmx_ok)
32907 break;
32908 /* FALLTHRU */
32909
32910 case V2DFmode:
32911 case V2DImode:
32912 use_vec_extr = true;
32913 break;
32914
32915 case V4SFmode:
32916 use_vec_extr = TARGET_SSE4_1;
32917 if (use_vec_extr)
32918 break;
32919
32920 switch (elt)
32921 {
32922 case 0:
32923 tmp = vec;
32924 break;
32925
32926 case 1:
32927 case 3:
32928 tmp = gen_reg_rtx (mode);
32929 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
32930 GEN_INT (elt), GEN_INT (elt),
32931 GEN_INT (elt+4), GEN_INT (elt+4)));
32932 break;
32933
32934 case 2:
32935 tmp = gen_reg_rtx (mode);
32936 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
32937 break;
32938
32939 default:
32940 gcc_unreachable ();
32941 }
32942 vec = tmp;
32943 use_vec_extr = true;
32944 elt = 0;
32945 break;
32946
32947 case V4SImode:
32948 use_vec_extr = TARGET_SSE4_1;
32949 if (use_vec_extr)
32950 break;
32951
32952 if (TARGET_SSE2)
32953 {
32954 switch (elt)
32955 {
32956 case 0:
32957 tmp = vec;
32958 break;
32959
32960 case 1:
32961 case 3:
32962 tmp = gen_reg_rtx (mode);
32963 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
32964 GEN_INT (elt), GEN_INT (elt),
32965 GEN_INT (elt), GEN_INT (elt)));
32966 break;
32967
32968 case 2:
32969 tmp = gen_reg_rtx (mode);
32970 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
32971 break;
32972
32973 default:
32974 gcc_unreachable ();
32975 }
32976 vec = tmp;
32977 use_vec_extr = true;
32978 elt = 0;
32979 }
32980 else
32981 {
32982 /* For SSE1, we have to reuse the V4SF code. */
32983 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
32984 gen_lowpart (V4SFmode, vec), elt);
32985 return;
32986 }
32987 break;
32988
32989 case V8HImode:
32990 use_vec_extr = TARGET_SSE2;
32991 break;
32992 case V4HImode:
32993 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32994 break;
32995
32996 case V16QImode:
32997 use_vec_extr = TARGET_SSE4_1;
32998 break;
32999
33000 case V8SFmode:
33001 if (TARGET_AVX)
33002 {
33003 tmp = gen_reg_rtx (V4SFmode);
33004 if (elt < 4)
33005 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
33006 else
33007 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
33008 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33009 return;
33010 }
33011 break;
33012
33013 case V4DFmode:
33014 if (TARGET_AVX)
33015 {
33016 tmp = gen_reg_rtx (V2DFmode);
33017 if (elt < 2)
33018 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
33019 else
33020 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
33021 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33022 return;
33023 }
33024 break;
33025
33026 case V32QImode:
33027 if (TARGET_AVX)
33028 {
33029 tmp = gen_reg_rtx (V16QImode);
33030 if (elt < 16)
33031 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
33032 else
33033 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
33034 ix86_expand_vector_extract (false, target, tmp, elt & 15);
33035 return;
33036 }
33037 break;
33038
33039 case V16HImode:
33040 if (TARGET_AVX)
33041 {
33042 tmp = gen_reg_rtx (V8HImode);
33043 if (elt < 8)
33044 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
33045 else
33046 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
33047 ix86_expand_vector_extract (false, target, tmp, elt & 7);
33048 return;
33049 }
33050 break;
33051
33052 case V8SImode:
33053 if (TARGET_AVX)
33054 {
33055 tmp = gen_reg_rtx (V4SImode);
33056 if (elt < 4)
33057 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
33058 else
33059 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
33060 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33061 return;
33062 }
33063 break;
33064
33065 case V4DImode:
33066 if (TARGET_AVX)
33067 {
33068 tmp = gen_reg_rtx (V2DImode);
33069 if (elt < 2)
33070 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
33071 else
33072 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
33073 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33074 return;
33075 }
33076 break;
33077
33078 case V8QImode:
33079 /* ??? Could extract the appropriate HImode element and shift. */
33080 default:
33081 break;
33082 }
33083
33084 if (use_vec_extr)
33085 {
33086 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
33087 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
33088
33089 /* Let the rtl optimizers know about the zero extension performed. */
33090 if (inner_mode == QImode || inner_mode == HImode)
33091 {
33092 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
33093 target = gen_lowpart (SImode, target);
33094 }
33095
33096 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33097 }
33098 else
33099 {
33100 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33101
33102 emit_move_insn (mem, vec);
33103
33104 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33105 emit_move_insn (target, tmp);
33106 }
33107 }
33108
33109 /* Expand a vector reduction. FN is the binary pattern to reduce;
33110 DEST is the destination; IN is the input vector. */
33111
33112 void
33113 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
33114 {
33115 rtx tmp1, tmp2, tmp3, tmp4, tmp5;
33116 enum machine_mode mode = GET_MODE (in);
33117 int i;
33118
33119 tmp1 = gen_reg_rtx (mode);
33120 tmp2 = gen_reg_rtx (mode);
33121 tmp3 = gen_reg_rtx (mode);
33122
33123 switch (mode)
33124 {
33125 case V4SFmode:
33126 emit_insn (gen_sse_movhlps (tmp1, in, in));
33127 emit_insn (fn (tmp2, tmp1, in));
33128 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
33129 const1_rtx, const1_rtx,
33130 GEN_INT (1+4), GEN_INT (1+4)));
33131 break;
33132 case V8SFmode:
33133 tmp4 = gen_reg_rtx (mode);
33134 tmp5 = gen_reg_rtx (mode);
33135 emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
33136 emit_insn (fn (tmp5, tmp4, in));
33137 emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
33138 emit_insn (fn (tmp2, tmp1, tmp5));
33139 emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
33140 break;
33141 case V4DFmode:
33142 emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
33143 emit_insn (fn (tmp2, tmp1, in));
33144 emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
33145 break;
33146 case V32QImode:
33147 case V16HImode:
33148 case V8SImode:
33149 case V4DImode:
33150 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, tmp1),
33151 gen_lowpart (V4DImode, in),
33152 gen_lowpart (V4DImode, in),
33153 const1_rtx));
33154 tmp4 = in;
33155 tmp5 = tmp1;
33156 for (i = 64; i >= GET_MODE_BITSIZE (GET_MODE_INNER (mode)); i >>= 1)
33157 {
33158 if (i != 64)
33159 {
33160 tmp2 = gen_reg_rtx (mode);
33161 tmp3 = gen_reg_rtx (mode);
33162 }
33163 emit_insn (fn (tmp2, tmp4, tmp5));
33164 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, tmp3),
33165 gen_lowpart (V2TImode, tmp2),
33166 GEN_INT (i)));
33167 tmp4 = tmp2;
33168 tmp5 = tmp3;
33169 }
33170 break;
33171 default:
33172 gcc_unreachable ();
33173 }
33174 emit_insn (fn (dest, tmp2, tmp3));
33175 }
33176 \f
33177 /* Target hook for scalar_mode_supported_p. */
33178 static bool
33179 ix86_scalar_mode_supported_p (enum machine_mode mode)
33180 {
33181 if (DECIMAL_FLOAT_MODE_P (mode))
33182 return default_decimal_float_supported_p ();
33183 else if (mode == TFmode)
33184 return true;
33185 else
33186 return default_scalar_mode_supported_p (mode);
33187 }
33188
33189 /* Implements target hook vector_mode_supported_p. */
33190 static bool
33191 ix86_vector_mode_supported_p (enum machine_mode mode)
33192 {
33193 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
33194 return true;
33195 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
33196 return true;
33197 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
33198 return true;
33199 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
33200 return true;
33201 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
33202 return true;
33203 return false;
33204 }
33205
33206 /* Target hook for c_mode_for_suffix. */
33207 static enum machine_mode
33208 ix86_c_mode_for_suffix (char suffix)
33209 {
33210 if (suffix == 'q')
33211 return TFmode;
33212 if (suffix == 'w')
33213 return XFmode;
33214
33215 return VOIDmode;
33216 }
33217
33218 /* Worker function for TARGET_MD_ASM_CLOBBERS.
33219
33220 We do this in the new i386 backend to maintain source compatibility
33221 with the old cc0-based compiler. */
33222
33223 static tree
33224 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
33225 tree inputs ATTRIBUTE_UNUSED,
33226 tree clobbers)
33227 {
33228 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
33229 clobbers);
33230 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
33231 clobbers);
33232 return clobbers;
33233 }
33234
33235 /* Implements target vector targetm.asm.encode_section_info. */
33236
33237 static void ATTRIBUTE_UNUSED
33238 ix86_encode_section_info (tree decl, rtx rtl, int first)
33239 {
33240 default_encode_section_info (decl, rtl, first);
33241
33242 if (TREE_CODE (decl) == VAR_DECL
33243 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
33244 && ix86_in_large_data_p (decl))
33245 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
33246 }
33247
33248 /* Worker function for REVERSE_CONDITION. */
33249
33250 enum rtx_code
33251 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
33252 {
33253 return (mode != CCFPmode && mode != CCFPUmode
33254 ? reverse_condition (code)
33255 : reverse_condition_maybe_unordered (code));
33256 }
33257
33258 /* Output code to perform an x87 FP register move, from OPERANDS[1]
33259 to OPERANDS[0]. */
33260
33261 const char *
33262 output_387_reg_move (rtx insn, rtx *operands)
33263 {
33264 if (REG_P (operands[0]))
33265 {
33266 if (REG_P (operands[1])
33267 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33268 {
33269 if (REGNO (operands[0]) == FIRST_STACK_REG)
33270 return output_387_ffreep (operands, 0);
33271 return "fstp\t%y0";
33272 }
33273 if (STACK_TOP_P (operands[0]))
33274 return "fld%Z1\t%y1";
33275 return "fst\t%y0";
33276 }
33277 else if (MEM_P (operands[0]))
33278 {
33279 gcc_assert (REG_P (operands[1]));
33280 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
33281 return "fstp%Z0\t%y0";
33282 else
33283 {
33284 /* There is no non-popping store to memory for XFmode.
33285 So if we need one, follow the store with a load. */
33286 if (GET_MODE (operands[0]) == XFmode)
33287 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
33288 else
33289 return "fst%Z0\t%y0";
33290 }
33291 }
33292 else
33293 gcc_unreachable();
33294 }
33295
33296 /* Output code to perform a conditional jump to LABEL, if C2 flag in
33297 FP status register is set. */
33298
33299 void
33300 ix86_emit_fp_unordered_jump (rtx label)
33301 {
33302 rtx reg = gen_reg_rtx (HImode);
33303 rtx temp;
33304
33305 emit_insn (gen_x86_fnstsw_1 (reg));
33306
33307 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
33308 {
33309 emit_insn (gen_x86_sahf_1 (reg));
33310
33311 temp = gen_rtx_REG (CCmode, FLAGS_REG);
33312 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
33313 }
33314 else
33315 {
33316 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
33317
33318 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
33319 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
33320 }
33321
33322 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
33323 gen_rtx_LABEL_REF (VOIDmode, label),
33324 pc_rtx);
33325 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
33326
33327 emit_jump_insn (temp);
33328 predict_jump (REG_BR_PROB_BASE * 10 / 100);
33329 }
33330
33331 /* Output code to perform a log1p XFmode calculation. */
33332
33333 void ix86_emit_i387_log1p (rtx op0, rtx op1)
33334 {
33335 rtx label1 = gen_label_rtx ();
33336 rtx label2 = gen_label_rtx ();
33337
33338 rtx tmp = gen_reg_rtx (XFmode);
33339 rtx tmp2 = gen_reg_rtx (XFmode);
33340 rtx test;
33341
33342 emit_insn (gen_absxf2 (tmp, op1));
33343 test = gen_rtx_GE (VOIDmode, tmp,
33344 CONST_DOUBLE_FROM_REAL_VALUE (
33345 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
33346 XFmode));
33347 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
33348
33349 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33350 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
33351 emit_jump (label2);
33352
33353 emit_label (label1);
33354 emit_move_insn (tmp, CONST1_RTX (XFmode));
33355 emit_insn (gen_addxf3 (tmp, op1, tmp));
33356 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33357 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
33358
33359 emit_label (label2);
33360 }
33361
33362 /* Emit code for round calculation. */
33363 void ix86_emit_i387_round (rtx op0, rtx op1)
33364 {
33365 enum machine_mode inmode = GET_MODE (op1);
33366 enum machine_mode outmode = GET_MODE (op0);
33367 rtx e1, e2, res, tmp, tmp1, half;
33368 rtx scratch = gen_reg_rtx (HImode);
33369 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
33370 rtx jump_label = gen_label_rtx ();
33371 rtx insn;
33372 rtx (*gen_abs) (rtx, rtx);
33373 rtx (*gen_neg) (rtx, rtx);
33374
33375 switch (inmode)
33376 {
33377 case SFmode:
33378 gen_abs = gen_abssf2;
33379 break;
33380 case DFmode:
33381 gen_abs = gen_absdf2;
33382 break;
33383 case XFmode:
33384 gen_abs = gen_absxf2;
33385 break;
33386 default:
33387 gcc_unreachable ();
33388 }
33389
33390 switch (outmode)
33391 {
33392 case SFmode:
33393 gen_neg = gen_negsf2;
33394 break;
33395 case DFmode:
33396 gen_neg = gen_negdf2;
33397 break;
33398 case XFmode:
33399 gen_neg = gen_negxf2;
33400 break;
33401 case HImode:
33402 gen_neg = gen_neghi2;
33403 break;
33404 case SImode:
33405 gen_neg = gen_negsi2;
33406 break;
33407 case DImode:
33408 gen_neg = gen_negdi2;
33409 break;
33410 default:
33411 gcc_unreachable ();
33412 }
33413
33414 e1 = gen_reg_rtx (inmode);
33415 e2 = gen_reg_rtx (inmode);
33416 res = gen_reg_rtx (outmode);
33417
33418 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
33419
33420 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33421
33422 /* scratch = fxam(op1) */
33423 emit_insn (gen_rtx_SET (VOIDmode, scratch,
33424 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
33425 UNSPEC_FXAM)));
33426 /* e1 = fabs(op1) */
33427 emit_insn (gen_abs (e1, op1));
33428
33429 /* e2 = e1 + 0.5 */
33430 half = force_reg (inmode, half);
33431 emit_insn (gen_rtx_SET (VOIDmode, e2,
33432 gen_rtx_PLUS (inmode, e1, half)));
33433
33434 /* res = floor(e2) */
33435 if (inmode != XFmode)
33436 {
33437 tmp1 = gen_reg_rtx (XFmode);
33438
33439 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
33440 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
33441 }
33442 else
33443 tmp1 = e2;
33444
33445 switch (outmode)
33446 {
33447 case SFmode:
33448 case DFmode:
33449 {
33450 rtx tmp0 = gen_reg_rtx (XFmode);
33451
33452 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
33453
33454 emit_insn (gen_rtx_SET (VOIDmode, res,
33455 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
33456 UNSPEC_TRUNC_NOOP)));
33457 }
33458 break;
33459 case XFmode:
33460 emit_insn (gen_frndintxf2_floor (res, tmp1));
33461 break;
33462 case HImode:
33463 emit_insn (gen_lfloorxfhi2 (res, tmp1));
33464 break;
33465 case SImode:
33466 emit_insn (gen_lfloorxfsi2 (res, tmp1));
33467 break;
33468 case DImode:
33469 emit_insn (gen_lfloorxfdi2 (res, tmp1));
33470 break;
33471 default:
33472 gcc_unreachable ();
33473 }
33474
33475 /* flags = signbit(a) */
33476 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
33477
33478 /* if (flags) then res = -res */
33479 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
33480 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
33481 gen_rtx_LABEL_REF (VOIDmode, jump_label),
33482 pc_rtx);
33483 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33484 predict_jump (REG_BR_PROB_BASE * 50 / 100);
33485 JUMP_LABEL (insn) = jump_label;
33486
33487 emit_insn (gen_neg (res, res));
33488
33489 emit_label (jump_label);
33490 LABEL_NUSES (jump_label) = 1;
33491
33492 emit_move_insn (op0, res);
33493 }
33494
33495 /* Output code to perform a Newton-Rhapson approximation of a single precision
33496 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33497
33498 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
33499 {
33500 rtx x0, x1, e0, e1;
33501
33502 x0 = gen_reg_rtx (mode);
33503 e0 = gen_reg_rtx (mode);
33504 e1 = gen_reg_rtx (mode);
33505 x1 = gen_reg_rtx (mode);
33506
33507 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33508
33509 /* x0 = rcp(b) estimate */
33510 emit_insn (gen_rtx_SET (VOIDmode, x0,
33511 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
33512 UNSPEC_RCP)));
33513 /* e0 = x0 * b */
33514 emit_insn (gen_rtx_SET (VOIDmode, e0,
33515 gen_rtx_MULT (mode, x0, b)));
33516
33517 /* e0 = x0 * e0 */
33518 emit_insn (gen_rtx_SET (VOIDmode, e0,
33519 gen_rtx_MULT (mode, x0, e0)));
33520
33521 /* e1 = x0 + x0 */
33522 emit_insn (gen_rtx_SET (VOIDmode, e1,
33523 gen_rtx_PLUS (mode, x0, x0)));
33524
33525 /* x1 = e1 - e0 */
33526 emit_insn (gen_rtx_SET (VOIDmode, x1,
33527 gen_rtx_MINUS (mode, e1, e0)));
33528
33529 /* res = a * x1 */
33530 emit_insn (gen_rtx_SET (VOIDmode, res,
33531 gen_rtx_MULT (mode, a, x1)));
33532 }
33533
33534 /* Output code to perform a Newton-Rhapson approximation of a
33535 single precision floating point [reciprocal] square root. */
33536
33537 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
33538 bool recip)
33539 {
33540 rtx x0, e0, e1, e2, e3, mthree, mhalf;
33541 REAL_VALUE_TYPE r;
33542
33543 x0 = gen_reg_rtx (mode);
33544 e0 = gen_reg_rtx (mode);
33545 e1 = gen_reg_rtx (mode);
33546 e2 = gen_reg_rtx (mode);
33547 e3 = gen_reg_rtx (mode);
33548
33549 real_from_integer (&r, VOIDmode, -3, -1, 0);
33550 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33551
33552 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
33553 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33554
33555 if (VECTOR_MODE_P (mode))
33556 {
33557 mthree = ix86_build_const_vector (mode, true, mthree);
33558 mhalf = ix86_build_const_vector (mode, true, mhalf);
33559 }
33560
33561 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33562 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33563
33564 /* x0 = rsqrt(a) estimate */
33565 emit_insn (gen_rtx_SET (VOIDmode, x0,
33566 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
33567 UNSPEC_RSQRT)));
33568
33569 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33570 if (!recip)
33571 {
33572 rtx zero, mask;
33573
33574 zero = gen_reg_rtx (mode);
33575 mask = gen_reg_rtx (mode);
33576
33577 zero = force_reg (mode, CONST0_RTX(mode));
33578 emit_insn (gen_rtx_SET (VOIDmode, mask,
33579 gen_rtx_NE (mode, zero, a)));
33580
33581 emit_insn (gen_rtx_SET (VOIDmode, x0,
33582 gen_rtx_AND (mode, x0, mask)));
33583 }
33584
33585 /* e0 = x0 * a */
33586 emit_insn (gen_rtx_SET (VOIDmode, e0,
33587 gen_rtx_MULT (mode, x0, a)));
33588 /* e1 = e0 * x0 */
33589 emit_insn (gen_rtx_SET (VOIDmode, e1,
33590 gen_rtx_MULT (mode, e0, x0)));
33591
33592 /* e2 = e1 - 3. */
33593 mthree = force_reg (mode, mthree);
33594 emit_insn (gen_rtx_SET (VOIDmode, e2,
33595 gen_rtx_PLUS (mode, e1, mthree)));
33596
33597 mhalf = force_reg (mode, mhalf);
33598 if (recip)
33599 /* e3 = -.5 * x0 */
33600 emit_insn (gen_rtx_SET (VOIDmode, e3,
33601 gen_rtx_MULT (mode, x0, mhalf)));
33602 else
33603 /* e3 = -.5 * e0 */
33604 emit_insn (gen_rtx_SET (VOIDmode, e3,
33605 gen_rtx_MULT (mode, e0, mhalf)));
33606 /* ret = e2 * e3 */
33607 emit_insn (gen_rtx_SET (VOIDmode, res,
33608 gen_rtx_MULT (mode, e2, e3)));
33609 }
33610
33611 #ifdef TARGET_SOLARIS
33612 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33613
33614 static void
33615 i386_solaris_elf_named_section (const char *name, unsigned int flags,
33616 tree decl)
33617 {
33618 /* With Binutils 2.15, the "@unwind" marker must be specified on
33619 every occurrence of the ".eh_frame" section, not just the first
33620 one. */
33621 if (TARGET_64BIT
33622 && strcmp (name, ".eh_frame") == 0)
33623 {
33624 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
33625 flags & SECTION_WRITE ? "aw" : "a");
33626 return;
33627 }
33628
33629 #ifndef USE_GAS
33630 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
33631 {
33632 solaris_elf_asm_comdat_section (name, flags, decl);
33633 return;
33634 }
33635 #endif
33636
33637 default_elf_asm_named_section (name, flags, decl);
33638 }
33639 #endif /* TARGET_SOLARIS */
33640
33641 /* Return the mangling of TYPE if it is an extended fundamental type. */
33642
33643 static const char *
33644 ix86_mangle_type (const_tree type)
33645 {
33646 type = TYPE_MAIN_VARIANT (type);
33647
33648 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33649 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33650 return NULL;
33651
33652 switch (TYPE_MODE (type))
33653 {
33654 case TFmode:
33655 /* __float128 is "g". */
33656 return "g";
33657 case XFmode:
33658 /* "long double" or __float80 is "e". */
33659 return "e";
33660 default:
33661 return NULL;
33662 }
33663 }
33664
33665 /* For 32-bit code we can save PIC register setup by using
33666 __stack_chk_fail_local hidden function instead of calling
33667 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33668 register, so it is better to call __stack_chk_fail directly. */
33669
33670 static tree ATTRIBUTE_UNUSED
33671 ix86_stack_protect_fail (void)
33672 {
33673 return TARGET_64BIT
33674 ? default_external_stack_protect_fail ()
33675 : default_hidden_stack_protect_fail ();
33676 }
33677
33678 /* Select a format to encode pointers in exception handling data. CODE
33679 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33680 true if the symbol may be affected by dynamic relocations.
33681
33682 ??? All x86 object file formats are capable of representing this.
33683 After all, the relocation needed is the same as for the call insn.
33684 Whether or not a particular assembler allows us to enter such, I
33685 guess we'll have to see. */
33686 int
33687 asm_preferred_eh_data_format (int code, int global)
33688 {
33689 if (flag_pic)
33690 {
33691 int type = DW_EH_PE_sdata8;
33692 if (!TARGET_64BIT
33693 || ix86_cmodel == CM_SMALL_PIC
33694 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
33695 type = DW_EH_PE_sdata4;
33696 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
33697 }
33698 if (ix86_cmodel == CM_SMALL
33699 || (ix86_cmodel == CM_MEDIUM && code))
33700 return DW_EH_PE_udata4;
33701 return DW_EH_PE_absptr;
33702 }
33703 \f
33704 /* Expand copysign from SIGN to the positive value ABS_VALUE
33705 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33706 the sign-bit. */
33707 static void
33708 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
33709 {
33710 enum machine_mode mode = GET_MODE (sign);
33711 rtx sgn = gen_reg_rtx (mode);
33712 if (mask == NULL_RTX)
33713 {
33714 enum machine_mode vmode;
33715
33716 if (mode == SFmode)
33717 vmode = V4SFmode;
33718 else if (mode == DFmode)
33719 vmode = V2DFmode;
33720 else
33721 vmode = mode;
33722
33723 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
33724 if (!VECTOR_MODE_P (mode))
33725 {
33726 /* We need to generate a scalar mode mask in this case. */
33727 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33728 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33729 mask = gen_reg_rtx (mode);
33730 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33731 }
33732 }
33733 else
33734 mask = gen_rtx_NOT (mode, mask);
33735 emit_insn (gen_rtx_SET (VOIDmode, sgn,
33736 gen_rtx_AND (mode, mask, sign)));
33737 emit_insn (gen_rtx_SET (VOIDmode, result,
33738 gen_rtx_IOR (mode, abs_value, sgn)));
33739 }
33740
33741 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33742 mask for masking out the sign-bit is stored in *SMASK, if that is
33743 non-null. */
33744 static rtx
33745 ix86_expand_sse_fabs (rtx op0, rtx *smask)
33746 {
33747 enum machine_mode vmode, mode = GET_MODE (op0);
33748 rtx xa, mask;
33749
33750 xa = gen_reg_rtx (mode);
33751 if (mode == SFmode)
33752 vmode = V4SFmode;
33753 else if (mode == DFmode)
33754 vmode = V2DFmode;
33755 else
33756 vmode = mode;
33757 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
33758 if (!VECTOR_MODE_P (mode))
33759 {
33760 /* We need to generate a scalar mode mask in this case. */
33761 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33762 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33763 mask = gen_reg_rtx (mode);
33764 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33765 }
33766 emit_insn (gen_rtx_SET (VOIDmode, xa,
33767 gen_rtx_AND (mode, op0, mask)));
33768
33769 if (smask)
33770 *smask = mask;
33771
33772 return xa;
33773 }
33774
33775 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33776 swapping the operands if SWAP_OPERANDS is true. The expanded
33777 code is a forward jump to a newly created label in case the
33778 comparison is true. The generated label rtx is returned. */
33779 static rtx
33780 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
33781 bool swap_operands)
33782 {
33783 rtx label, tmp;
33784
33785 if (swap_operands)
33786 {
33787 tmp = op0;
33788 op0 = op1;
33789 op1 = tmp;
33790 }
33791
33792 label = gen_label_rtx ();
33793 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
33794 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33795 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
33796 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
33797 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
33798 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
33799 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33800 JUMP_LABEL (tmp) = label;
33801
33802 return label;
33803 }
33804
33805 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33806 using comparison code CODE. Operands are swapped for the comparison if
33807 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33808 static rtx
33809 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
33810 bool swap_operands)
33811 {
33812 rtx (*insn)(rtx, rtx, rtx, rtx);
33813 enum machine_mode mode = GET_MODE (op0);
33814 rtx mask = gen_reg_rtx (mode);
33815
33816 if (swap_operands)
33817 {
33818 rtx tmp = op0;
33819 op0 = op1;
33820 op1 = tmp;
33821 }
33822
33823 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
33824
33825 emit_insn (insn (mask, op0, op1,
33826 gen_rtx_fmt_ee (code, mode, op0, op1)));
33827 return mask;
33828 }
33829
33830 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33831 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33832 static rtx
33833 ix86_gen_TWO52 (enum machine_mode mode)
33834 {
33835 REAL_VALUE_TYPE TWO52r;
33836 rtx TWO52;
33837
33838 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
33839 TWO52 = const_double_from_real_value (TWO52r, mode);
33840 TWO52 = force_reg (mode, TWO52);
33841
33842 return TWO52;
33843 }
33844
33845 /* Expand SSE sequence for computing lround from OP1 storing
33846 into OP0. */
33847 void
33848 ix86_expand_lround (rtx op0, rtx op1)
33849 {
33850 /* C code for the stuff we're doing below:
33851 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
33852 return (long)tmp;
33853 */
33854 enum machine_mode mode = GET_MODE (op1);
33855 const struct real_format *fmt;
33856 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33857 rtx adj;
33858
33859 /* load nextafter (0.5, 0.0) */
33860 fmt = REAL_MODE_FORMAT (mode);
33861 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33862 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33863
33864 /* adj = copysign (0.5, op1) */
33865 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
33866 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
33867
33868 /* adj = op1 + adj */
33869 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
33870
33871 /* op0 = (imode)adj */
33872 expand_fix (op0, adj, 0);
33873 }
33874
33875 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
33876 into OPERAND0. */
33877 void
33878 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
33879 {
33880 /* C code for the stuff we're doing below (for do_floor):
33881 xi = (long)op1;
33882 xi -= (double)xi > op1 ? 1 : 0;
33883 return xi;
33884 */
33885 enum machine_mode fmode = GET_MODE (op1);
33886 enum machine_mode imode = GET_MODE (op0);
33887 rtx ireg, freg, label, tmp;
33888
33889 /* reg = (long)op1 */
33890 ireg = gen_reg_rtx (imode);
33891 expand_fix (ireg, op1, 0);
33892
33893 /* freg = (double)reg */
33894 freg = gen_reg_rtx (fmode);
33895 expand_float (freg, ireg, 0);
33896
33897 /* ireg = (freg > op1) ? ireg - 1 : ireg */
33898 label = ix86_expand_sse_compare_and_jump (UNLE,
33899 freg, op1, !do_floor);
33900 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
33901 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
33902 emit_move_insn (ireg, tmp);
33903
33904 emit_label (label);
33905 LABEL_NUSES (label) = 1;
33906
33907 emit_move_insn (op0, ireg);
33908 }
33909
33910 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
33911 result in OPERAND0. */
33912 void
33913 ix86_expand_rint (rtx operand0, rtx operand1)
33914 {
33915 /* C code for the stuff we're doing below:
33916 xa = fabs (operand1);
33917 if (!isless (xa, 2**52))
33918 return operand1;
33919 xa = xa + 2**52 - 2**52;
33920 return copysign (xa, operand1);
33921 */
33922 enum machine_mode mode = GET_MODE (operand0);
33923 rtx res, xa, label, TWO52, mask;
33924
33925 res = gen_reg_rtx (mode);
33926 emit_move_insn (res, operand1);
33927
33928 /* xa = abs (operand1) */
33929 xa = ix86_expand_sse_fabs (res, &mask);
33930
33931 /* if (!isless (xa, TWO52)) goto label; */
33932 TWO52 = ix86_gen_TWO52 (mode);
33933 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33934
33935 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33936 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33937
33938 ix86_sse_copysign_to_positive (res, xa, res, mask);
33939
33940 emit_label (label);
33941 LABEL_NUSES (label) = 1;
33942
33943 emit_move_insn (operand0, res);
33944 }
33945
33946 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33947 into OPERAND0. */
33948 void
33949 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
33950 {
33951 /* C code for the stuff we expand below.
33952 double xa = fabs (x), x2;
33953 if (!isless (xa, TWO52))
33954 return x;
33955 xa = xa + TWO52 - TWO52;
33956 x2 = copysign (xa, x);
33957 Compensate. Floor:
33958 if (x2 > x)
33959 x2 -= 1;
33960 Compensate. Ceil:
33961 if (x2 < x)
33962 x2 -= -1;
33963 return x2;
33964 */
33965 enum machine_mode mode = GET_MODE (operand0);
33966 rtx xa, TWO52, tmp, label, one, res, mask;
33967
33968 TWO52 = ix86_gen_TWO52 (mode);
33969
33970 /* Temporary for holding the result, initialized to the input
33971 operand to ease control flow. */
33972 res = gen_reg_rtx (mode);
33973 emit_move_insn (res, operand1);
33974
33975 /* xa = abs (operand1) */
33976 xa = ix86_expand_sse_fabs (res, &mask);
33977
33978 /* if (!isless (xa, TWO52)) goto label; */
33979 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33980
33981 /* xa = xa + TWO52 - TWO52; */
33982 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33983 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33984
33985 /* xa = copysign (xa, operand1) */
33986 ix86_sse_copysign_to_positive (xa, xa, res, mask);
33987
33988 /* generate 1.0 or -1.0 */
33989 one = force_reg (mode,
33990 const_double_from_real_value (do_floor
33991 ? dconst1 : dconstm1, mode));
33992
33993 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33994 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33995 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33996 gen_rtx_AND (mode, one, tmp)));
33997 /* We always need to subtract here to preserve signed zero. */
33998 tmp = expand_simple_binop (mode, MINUS,
33999 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34000 emit_move_insn (res, tmp);
34001
34002 emit_label (label);
34003 LABEL_NUSES (label) = 1;
34004
34005 emit_move_insn (operand0, res);
34006 }
34007
34008 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34009 into OPERAND0. */
34010 void
34011 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
34012 {
34013 /* C code for the stuff we expand below.
34014 double xa = fabs (x), x2;
34015 if (!isless (xa, TWO52))
34016 return x;
34017 x2 = (double)(long)x;
34018 Compensate. Floor:
34019 if (x2 > x)
34020 x2 -= 1;
34021 Compensate. Ceil:
34022 if (x2 < x)
34023 x2 += 1;
34024 if (HONOR_SIGNED_ZEROS (mode))
34025 return copysign (x2, x);
34026 return x2;
34027 */
34028 enum machine_mode mode = GET_MODE (operand0);
34029 rtx xa, xi, TWO52, tmp, label, one, res, mask;
34030
34031 TWO52 = ix86_gen_TWO52 (mode);
34032
34033 /* Temporary for holding the result, initialized to the input
34034 operand to ease control flow. */
34035 res = gen_reg_rtx (mode);
34036 emit_move_insn (res, operand1);
34037
34038 /* xa = abs (operand1) */
34039 xa = ix86_expand_sse_fabs (res, &mask);
34040
34041 /* if (!isless (xa, TWO52)) goto label; */
34042 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34043
34044 /* xa = (double)(long)x */
34045 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34046 expand_fix (xi, res, 0);
34047 expand_float (xa, xi, 0);
34048
34049 /* generate 1.0 */
34050 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34051
34052 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34053 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
34054 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34055 gen_rtx_AND (mode, one, tmp)));
34056 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
34057 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34058 emit_move_insn (res, tmp);
34059
34060 if (HONOR_SIGNED_ZEROS (mode))
34061 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34062
34063 emit_label (label);
34064 LABEL_NUSES (label) = 1;
34065
34066 emit_move_insn (operand0, res);
34067 }
34068
34069 /* Expand SSE sequence for computing round from OPERAND1 storing
34070 into OPERAND0. Sequence that works without relying on DImode truncation
34071 via cvttsd2siq that is only available on 64bit targets. */
34072 void
34073 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
34074 {
34075 /* C code for the stuff we expand below.
34076 double xa = fabs (x), xa2, x2;
34077 if (!isless (xa, TWO52))
34078 return x;
34079 Using the absolute value and copying back sign makes
34080 -0.0 -> -0.0 correct.
34081 xa2 = xa + TWO52 - TWO52;
34082 Compensate.
34083 dxa = xa2 - xa;
34084 if (dxa <= -0.5)
34085 xa2 += 1;
34086 else if (dxa > 0.5)
34087 xa2 -= 1;
34088 x2 = copysign (xa2, x);
34089 return x2;
34090 */
34091 enum machine_mode mode = GET_MODE (operand0);
34092 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
34093
34094 TWO52 = ix86_gen_TWO52 (mode);
34095
34096 /* Temporary for holding the result, initialized to the input
34097 operand to ease control flow. */
34098 res = gen_reg_rtx (mode);
34099 emit_move_insn (res, operand1);
34100
34101 /* xa = abs (operand1) */
34102 xa = ix86_expand_sse_fabs (res, &mask);
34103
34104 /* if (!isless (xa, TWO52)) goto label; */
34105 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34106
34107 /* xa2 = xa + TWO52 - TWO52; */
34108 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34109 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
34110
34111 /* dxa = xa2 - xa; */
34112 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
34113
34114 /* generate 0.5, 1.0 and -0.5 */
34115 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
34116 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
34117 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
34118 0, OPTAB_DIRECT);
34119
34120 /* Compensate. */
34121 tmp = gen_reg_rtx (mode);
34122 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
34123 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
34124 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34125 gen_rtx_AND (mode, one, tmp)));
34126 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34127 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
34128 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
34129 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34130 gen_rtx_AND (mode, one, tmp)));
34131 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34132
34133 /* res = copysign (xa2, operand1) */
34134 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
34135
34136 emit_label (label);
34137 LABEL_NUSES (label) = 1;
34138
34139 emit_move_insn (operand0, res);
34140 }
34141
34142 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34143 into OPERAND0. */
34144 void
34145 ix86_expand_trunc (rtx operand0, rtx operand1)
34146 {
34147 /* C code for SSE variant we expand below.
34148 double xa = fabs (x), x2;
34149 if (!isless (xa, TWO52))
34150 return x;
34151 x2 = (double)(long)x;
34152 if (HONOR_SIGNED_ZEROS (mode))
34153 return copysign (x2, x);
34154 return x2;
34155 */
34156 enum machine_mode mode = GET_MODE (operand0);
34157 rtx xa, xi, TWO52, label, res, mask;
34158
34159 TWO52 = ix86_gen_TWO52 (mode);
34160
34161 /* Temporary for holding the result, initialized to the input
34162 operand to ease control flow. */
34163 res = gen_reg_rtx (mode);
34164 emit_move_insn (res, operand1);
34165
34166 /* xa = abs (operand1) */
34167 xa = ix86_expand_sse_fabs (res, &mask);
34168
34169 /* if (!isless (xa, TWO52)) goto label; */
34170 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34171
34172 /* x = (double)(long)x */
34173 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34174 expand_fix (xi, res, 0);
34175 expand_float (res, xi, 0);
34176
34177 if (HONOR_SIGNED_ZEROS (mode))
34178 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34179
34180 emit_label (label);
34181 LABEL_NUSES (label) = 1;
34182
34183 emit_move_insn (operand0, res);
34184 }
34185
34186 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34187 into OPERAND0. */
34188 void
34189 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
34190 {
34191 enum machine_mode mode = GET_MODE (operand0);
34192 rtx xa, mask, TWO52, label, one, res, smask, tmp;
34193
34194 /* C code for SSE variant we expand below.
34195 double xa = fabs (x), x2;
34196 if (!isless (xa, TWO52))
34197 return x;
34198 xa2 = xa + TWO52 - TWO52;
34199 Compensate:
34200 if (xa2 > xa)
34201 xa2 -= 1.0;
34202 x2 = copysign (xa2, x);
34203 return x2;
34204 */
34205
34206 TWO52 = ix86_gen_TWO52 (mode);
34207
34208 /* Temporary for holding the result, initialized to the input
34209 operand to ease control flow. */
34210 res = gen_reg_rtx (mode);
34211 emit_move_insn (res, operand1);
34212
34213 /* xa = abs (operand1) */
34214 xa = ix86_expand_sse_fabs (res, &smask);
34215
34216 /* if (!isless (xa, TWO52)) goto label; */
34217 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34218
34219 /* res = xa + TWO52 - TWO52; */
34220 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34221 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
34222 emit_move_insn (res, tmp);
34223
34224 /* generate 1.0 */
34225 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34226
34227 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
34228 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
34229 emit_insn (gen_rtx_SET (VOIDmode, mask,
34230 gen_rtx_AND (mode, mask, one)));
34231 tmp = expand_simple_binop (mode, MINUS,
34232 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
34233 emit_move_insn (res, tmp);
34234
34235 /* res = copysign (res, operand1) */
34236 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
34237
34238 emit_label (label);
34239 LABEL_NUSES (label) = 1;
34240
34241 emit_move_insn (operand0, res);
34242 }
34243
34244 /* Expand SSE sequence for computing round from OPERAND1 storing
34245 into OPERAND0. */
34246 void
34247 ix86_expand_round (rtx operand0, rtx operand1)
34248 {
34249 /* C code for the stuff we're doing below:
34250 double xa = fabs (x);
34251 if (!isless (xa, TWO52))
34252 return x;
34253 xa = (double)(long)(xa + nextafter (0.5, 0.0));
34254 return copysign (xa, x);
34255 */
34256 enum machine_mode mode = GET_MODE (operand0);
34257 rtx res, TWO52, xa, label, xi, half, mask;
34258 const struct real_format *fmt;
34259 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34260
34261 /* Temporary for holding the result, initialized to the input
34262 operand to ease control flow. */
34263 res = gen_reg_rtx (mode);
34264 emit_move_insn (res, operand1);
34265
34266 TWO52 = ix86_gen_TWO52 (mode);
34267 xa = ix86_expand_sse_fabs (res, &mask);
34268 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34269
34270 /* load nextafter (0.5, 0.0) */
34271 fmt = REAL_MODE_FORMAT (mode);
34272 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34273 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34274
34275 /* xa = xa + 0.5 */
34276 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
34277 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
34278
34279 /* xa = (double)(int64_t)xa */
34280 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34281 expand_fix (xi, xa, 0);
34282 expand_float (xa, xi, 0);
34283
34284 /* res = copysign (xa, operand1) */
34285 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
34286
34287 emit_label (label);
34288 LABEL_NUSES (label) = 1;
34289
34290 emit_move_insn (operand0, res);
34291 }
34292
34293 /* Expand SSE sequence for computing round
34294 from OP1 storing into OP0 using sse4 round insn. */
34295 void
34296 ix86_expand_round_sse4 (rtx op0, rtx op1)
34297 {
34298 enum machine_mode mode = GET_MODE (op0);
34299 rtx e1, e2, res, half;
34300 const struct real_format *fmt;
34301 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34302 rtx (*gen_copysign) (rtx, rtx, rtx);
34303 rtx (*gen_round) (rtx, rtx, rtx);
34304
34305 switch (mode)
34306 {
34307 case SFmode:
34308 gen_copysign = gen_copysignsf3;
34309 gen_round = gen_sse4_1_roundsf2;
34310 break;
34311 case DFmode:
34312 gen_copysign = gen_copysigndf3;
34313 gen_round = gen_sse4_1_rounddf2;
34314 break;
34315 default:
34316 gcc_unreachable ();
34317 }
34318
34319 /* round (a) = trunc (a + copysign (0.5, a)) */
34320
34321 /* load nextafter (0.5, 0.0) */
34322 fmt = REAL_MODE_FORMAT (mode);
34323 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34324 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34325 half = const_double_from_real_value (pred_half, mode);
34326
34327 /* e1 = copysign (0.5, op1) */
34328 e1 = gen_reg_rtx (mode);
34329 emit_insn (gen_copysign (e1, half, op1));
34330
34331 /* e2 = op1 + e1 */
34332 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
34333
34334 /* res = trunc (e2) */
34335 res = gen_reg_rtx (mode);
34336 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
34337
34338 emit_move_insn (op0, res);
34339 }
34340 \f
34341
34342 /* Table of valid machine attributes. */
34343 static const struct attribute_spec ix86_attribute_table[] =
34344 {
34345 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
34346 affects_type_identity } */
34347 /* Stdcall attribute says callee is responsible for popping arguments
34348 if they are not variable. */
34349 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34350 true },
34351 /* Fastcall attribute says callee is responsible for popping arguments
34352 if they are not variable. */
34353 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34354 true },
34355 /* Thiscall attribute says callee is responsible for popping arguments
34356 if they are not variable. */
34357 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34358 true },
34359 /* Cdecl attribute says the callee is a normal C declaration */
34360 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34361 true },
34362 /* Regparm attribute specifies how many integer arguments are to be
34363 passed in registers. */
34364 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
34365 true },
34366 /* Sseregparm attribute says we are using x86_64 calling conventions
34367 for FP arguments. */
34368 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34369 true },
34370 /* force_align_arg_pointer says this function realigns the stack at entry. */
34371 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
34372 false, true, true, ix86_handle_cconv_attribute, false },
34373 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34374 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
34375 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
34376 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
34377 false },
34378 #endif
34379 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34380 false },
34381 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34382 false },
34383 #ifdef SUBTARGET_ATTRIBUTE_TABLE
34384 SUBTARGET_ATTRIBUTE_TABLE,
34385 #endif
34386 /* ms_abi and sysv_abi calling convention function attributes. */
34387 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34388 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34389 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
34390 false },
34391 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34392 ix86_handle_callee_pop_aggregate_return, true },
34393 /* End element. */
34394 { NULL, 0, 0, false, false, false, NULL, false }
34395 };
34396
34397 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34398 static int
34399 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
34400 tree vectype ATTRIBUTE_UNUSED,
34401 int misalign ATTRIBUTE_UNUSED)
34402 {
34403 switch (type_of_cost)
34404 {
34405 case scalar_stmt:
34406 return ix86_cost->scalar_stmt_cost;
34407
34408 case scalar_load:
34409 return ix86_cost->scalar_load_cost;
34410
34411 case scalar_store:
34412 return ix86_cost->scalar_store_cost;
34413
34414 case vector_stmt:
34415 return ix86_cost->vec_stmt_cost;
34416
34417 case vector_load:
34418 return ix86_cost->vec_align_load_cost;
34419
34420 case vector_store:
34421 return ix86_cost->vec_store_cost;
34422
34423 case vec_to_scalar:
34424 return ix86_cost->vec_to_scalar_cost;
34425
34426 case scalar_to_vec:
34427 return ix86_cost->scalar_to_vec_cost;
34428
34429 case unaligned_load:
34430 case unaligned_store:
34431 return ix86_cost->vec_unalign_load_cost;
34432
34433 case cond_branch_taken:
34434 return ix86_cost->cond_taken_branch_cost;
34435
34436 case cond_branch_not_taken:
34437 return ix86_cost->cond_not_taken_branch_cost;
34438
34439 case vec_perm:
34440 return 1;
34441
34442 default:
34443 gcc_unreachable ();
34444 }
34445 }
34446
34447
34448 /* Implement targetm.vectorize.builtin_vec_perm. */
34449
34450 static tree
34451 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
34452 {
34453 tree itype = TREE_TYPE (vec_type);
34454 bool u = TYPE_UNSIGNED (itype);
34455 enum machine_mode vmode = TYPE_MODE (vec_type);
34456 enum ix86_builtins fcode;
34457 bool ok = TARGET_SSE2;
34458
34459 switch (vmode)
34460 {
34461 case V4DFmode:
34462 ok = TARGET_AVX;
34463 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
34464 goto get_di;
34465 case V2DFmode:
34466 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
34467 get_di:
34468 itype = ix86_get_builtin_type (IX86_BT_DI);
34469 break;
34470
34471 case V8SFmode:
34472 ok = TARGET_AVX;
34473 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
34474 goto get_si;
34475 case V4SFmode:
34476 ok = TARGET_SSE;
34477 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
34478 get_si:
34479 itype = ix86_get_builtin_type (IX86_BT_SI);
34480 break;
34481
34482 case V2DImode:
34483 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
34484 break;
34485 case V4SImode:
34486 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
34487 break;
34488 case V8HImode:
34489 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
34490 break;
34491 case V16QImode:
34492 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
34493 break;
34494 default:
34495 ok = false;
34496 break;
34497 }
34498
34499 if (!ok)
34500 return NULL_TREE;
34501
34502 *mask_type = itype;
34503 return ix86_builtins[(int) fcode];
34504 }
34505
34506 /* Return a vector mode with twice as many elements as VMODE. */
34507 /* ??? Consider moving this to a table generated by genmodes.c. */
34508
34509 static enum machine_mode
34510 doublesize_vector_mode (enum machine_mode vmode)
34511 {
34512 switch (vmode)
34513 {
34514 case V2SFmode: return V4SFmode;
34515 case V1DImode: return V2DImode;
34516 case V2SImode: return V4SImode;
34517 case V4HImode: return V8HImode;
34518 case V8QImode: return V16QImode;
34519
34520 case V2DFmode: return V4DFmode;
34521 case V4SFmode: return V8SFmode;
34522 case V2DImode: return V4DImode;
34523 case V4SImode: return V8SImode;
34524 case V8HImode: return V16HImode;
34525 case V16QImode: return V32QImode;
34526
34527 case V4DFmode: return V8DFmode;
34528 case V8SFmode: return V16SFmode;
34529 case V4DImode: return V8DImode;
34530 case V8SImode: return V16SImode;
34531 case V16HImode: return V32HImode;
34532 case V32QImode: return V64QImode;
34533
34534 default:
34535 gcc_unreachable ();
34536 }
34537 }
34538
34539 /* Construct (set target (vec_select op0 (parallel perm))) and
34540 return true if that's a valid instruction in the active ISA. */
34541
34542 static bool
34543 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
34544 {
34545 rtx rperm[MAX_VECT_LEN], x;
34546 unsigned i;
34547
34548 for (i = 0; i < nelt; ++i)
34549 rperm[i] = GEN_INT (perm[i]);
34550
34551 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
34552 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
34553 x = gen_rtx_SET (VOIDmode, target, x);
34554
34555 x = emit_insn (x);
34556 if (recog_memoized (x) < 0)
34557 {
34558 remove_insn (x);
34559 return false;
34560 }
34561 return true;
34562 }
34563
34564 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34565
34566 static bool
34567 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
34568 const unsigned char *perm, unsigned nelt)
34569 {
34570 enum machine_mode v2mode;
34571 rtx x;
34572
34573 v2mode = doublesize_vector_mode (GET_MODE (op0));
34574 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
34575 return expand_vselect (target, x, perm, nelt);
34576 }
34577
34578 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34579 in terms of blendp[sd] / pblendw / pblendvb. */
34580
34581 static bool
34582 expand_vec_perm_blend (struct expand_vec_perm_d *d)
34583 {
34584 enum machine_mode vmode = d->vmode;
34585 unsigned i, mask, nelt = d->nelt;
34586 rtx target, op0, op1, x;
34587
34588 if (!TARGET_SSE4_1 || d->op0 == d->op1)
34589 return false;
34590 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
34591 return false;
34592
34593 /* This is a blend, not a permute. Elements must stay in their
34594 respective lanes. */
34595 for (i = 0; i < nelt; ++i)
34596 {
34597 unsigned e = d->perm[i];
34598 if (!(e == i || e == i + nelt))
34599 return false;
34600 }
34601
34602 if (d->testing_p)
34603 return true;
34604
34605 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34606 decision should be extracted elsewhere, so that we only try that
34607 sequence once all budget==3 options have been tried. */
34608
34609 /* For bytes, see if bytes move in pairs so we can use pblendw with
34610 an immediate argument, rather than pblendvb with a vector argument. */
34611 if (vmode == V16QImode)
34612 {
34613 bool pblendw_ok = true;
34614 for (i = 0; i < 16 && pblendw_ok; i += 2)
34615 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
34616
34617 if (!pblendw_ok)
34618 {
34619 rtx rperm[16], vperm;
34620
34621 for (i = 0; i < nelt; ++i)
34622 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
34623
34624 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34625 vperm = force_reg (V16QImode, vperm);
34626
34627 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
34628 return true;
34629 }
34630 }
34631
34632 target = d->target;
34633 op0 = d->op0;
34634 op1 = d->op1;
34635 mask = 0;
34636
34637 switch (vmode)
34638 {
34639 case V4DFmode:
34640 case V8SFmode:
34641 case V2DFmode:
34642 case V4SFmode:
34643 case V8HImode:
34644 for (i = 0; i < nelt; ++i)
34645 mask |= (d->perm[i] >= nelt) << i;
34646 break;
34647
34648 case V2DImode:
34649 for (i = 0; i < 2; ++i)
34650 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
34651 goto do_subreg;
34652
34653 case V4SImode:
34654 for (i = 0; i < 4; ++i)
34655 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34656 goto do_subreg;
34657
34658 case V16QImode:
34659 for (i = 0; i < 8; ++i)
34660 mask |= (d->perm[i * 2] >= 16) << i;
34661
34662 do_subreg:
34663 vmode = V8HImode;
34664 target = gen_lowpart (vmode, target);
34665 op0 = gen_lowpart (vmode, op0);
34666 op1 = gen_lowpart (vmode, op1);
34667 break;
34668
34669 default:
34670 gcc_unreachable ();
34671 }
34672
34673 /* This matches five different patterns with the different modes. */
34674 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
34675 x = gen_rtx_SET (VOIDmode, target, x);
34676 emit_insn (x);
34677
34678 return true;
34679 }
34680
34681 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34682 in terms of the variable form of vpermilps.
34683
34684 Note that we will have already failed the immediate input vpermilps,
34685 which requires that the high and low part shuffle be identical; the
34686 variable form doesn't require that. */
34687
34688 static bool
34689 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
34690 {
34691 rtx rperm[8], vperm;
34692 unsigned i;
34693
34694 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
34695 return false;
34696
34697 /* We can only permute within the 128-bit lane. */
34698 for (i = 0; i < 8; ++i)
34699 {
34700 unsigned e = d->perm[i];
34701 if (i < 4 ? e >= 4 : e < 4)
34702 return false;
34703 }
34704
34705 if (d->testing_p)
34706 return true;
34707
34708 for (i = 0; i < 8; ++i)
34709 {
34710 unsigned e = d->perm[i];
34711
34712 /* Within each 128-bit lane, the elements of op0 are numbered
34713 from 0 and the elements of op1 are numbered from 4. */
34714 if (e >= 8 + 4)
34715 e -= 8;
34716 else if (e >= 4)
34717 e -= 4;
34718
34719 rperm[i] = GEN_INT (e);
34720 }
34721
34722 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
34723 vperm = force_reg (V8SImode, vperm);
34724 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
34725
34726 return true;
34727 }
34728
34729 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34730 in terms of pshufb or vpperm. */
34731
34732 static bool
34733 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
34734 {
34735 unsigned i, nelt, eltsz;
34736 rtx rperm[16], vperm, target, op0, op1;
34737
34738 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
34739 return false;
34740 if (GET_MODE_SIZE (d->vmode) != 16)
34741 return false;
34742
34743 if (d->testing_p)
34744 return true;
34745
34746 nelt = d->nelt;
34747 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34748
34749 for (i = 0; i < nelt; ++i)
34750 {
34751 unsigned j, e = d->perm[i];
34752 for (j = 0; j < eltsz; ++j)
34753 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
34754 }
34755
34756 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34757 vperm = force_reg (V16QImode, vperm);
34758
34759 target = gen_lowpart (V16QImode, d->target);
34760 op0 = gen_lowpart (V16QImode, d->op0);
34761 if (d->op0 == d->op1)
34762 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
34763 else
34764 {
34765 op1 = gen_lowpart (V16QImode, d->op1);
34766 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
34767 }
34768
34769 return true;
34770 }
34771
34772 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
34773 in a single instruction. */
34774
34775 static bool
34776 expand_vec_perm_1 (struct expand_vec_perm_d *d)
34777 {
34778 unsigned i, nelt = d->nelt;
34779 unsigned char perm2[MAX_VECT_LEN];
34780
34781 /* Check plain VEC_SELECT first, because AVX has instructions that could
34782 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
34783 input where SEL+CONCAT may not. */
34784 if (d->op0 == d->op1)
34785 {
34786 int mask = nelt - 1;
34787
34788 for (i = 0; i < nelt; i++)
34789 perm2[i] = d->perm[i] & mask;
34790
34791 if (expand_vselect (d->target, d->op0, perm2, nelt))
34792 return true;
34793
34794 /* There are plenty of patterns in sse.md that are written for
34795 SEL+CONCAT and are not replicated for a single op. Perhaps
34796 that should be changed, to avoid the nastiness here. */
34797
34798 /* Recognize interleave style patterns, which means incrementing
34799 every other permutation operand. */
34800 for (i = 0; i < nelt; i += 2)
34801 {
34802 perm2[i] = d->perm[i] & mask;
34803 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
34804 }
34805 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34806 return true;
34807
34808 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
34809 if (nelt >= 4)
34810 {
34811 for (i = 0; i < nelt; i += 4)
34812 {
34813 perm2[i + 0] = d->perm[i + 0] & mask;
34814 perm2[i + 1] = d->perm[i + 1] & mask;
34815 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
34816 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
34817 }
34818
34819 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34820 return true;
34821 }
34822 }
34823
34824 /* Finally, try the fully general two operand permute. */
34825 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
34826 return true;
34827
34828 /* Recognize interleave style patterns with reversed operands. */
34829 if (d->op0 != d->op1)
34830 {
34831 for (i = 0; i < nelt; ++i)
34832 {
34833 unsigned e = d->perm[i];
34834 if (e >= nelt)
34835 e -= nelt;
34836 else
34837 e += nelt;
34838 perm2[i] = e;
34839 }
34840
34841 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
34842 return true;
34843 }
34844
34845 /* Try the SSE4.1 blend variable merge instructions. */
34846 if (expand_vec_perm_blend (d))
34847 return true;
34848
34849 /* Try one of the AVX vpermil variable permutations. */
34850 if (expand_vec_perm_vpermil (d))
34851 return true;
34852
34853 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
34854 if (expand_vec_perm_pshufb (d))
34855 return true;
34856
34857 return false;
34858 }
34859
34860 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34861 in terms of a pair of pshuflw + pshufhw instructions. */
34862
34863 static bool
34864 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
34865 {
34866 unsigned char perm2[MAX_VECT_LEN];
34867 unsigned i;
34868 bool ok;
34869
34870 if (d->vmode != V8HImode || d->op0 != d->op1)
34871 return false;
34872
34873 /* The two permutations only operate in 64-bit lanes. */
34874 for (i = 0; i < 4; ++i)
34875 if (d->perm[i] >= 4)
34876 return false;
34877 for (i = 4; i < 8; ++i)
34878 if (d->perm[i] < 4)
34879 return false;
34880
34881 if (d->testing_p)
34882 return true;
34883
34884 /* Emit the pshuflw. */
34885 memcpy (perm2, d->perm, 4);
34886 for (i = 4; i < 8; ++i)
34887 perm2[i] = i;
34888 ok = expand_vselect (d->target, d->op0, perm2, 8);
34889 gcc_assert (ok);
34890
34891 /* Emit the pshufhw. */
34892 memcpy (perm2 + 4, d->perm + 4, 4);
34893 for (i = 0; i < 4; ++i)
34894 perm2[i] = i;
34895 ok = expand_vselect (d->target, d->target, perm2, 8);
34896 gcc_assert (ok);
34897
34898 return true;
34899 }
34900
34901 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34902 the permutation using the SSSE3 palignr instruction. This succeeds
34903 when all of the elements in PERM fit within one vector and we merely
34904 need to shift them down so that a single vector permutation has a
34905 chance to succeed. */
34906
34907 static bool
34908 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
34909 {
34910 unsigned i, nelt = d->nelt;
34911 unsigned min, max;
34912 bool in_order, ok;
34913 rtx shift;
34914
34915 /* Even with AVX, palignr only operates on 128-bit vectors. */
34916 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34917 return false;
34918
34919 min = nelt, max = 0;
34920 for (i = 0; i < nelt; ++i)
34921 {
34922 unsigned e = d->perm[i];
34923 if (e < min)
34924 min = e;
34925 if (e > max)
34926 max = e;
34927 }
34928 if (min == 0 || max - min >= nelt)
34929 return false;
34930
34931 /* Given that we have SSSE3, we know we'll be able to implement the
34932 single operand permutation after the palignr with pshufb. */
34933 if (d->testing_p)
34934 return true;
34935
34936 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
34937 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
34938 gen_lowpart (TImode, d->op1),
34939 gen_lowpart (TImode, d->op0), shift));
34940
34941 d->op0 = d->op1 = d->target;
34942
34943 in_order = true;
34944 for (i = 0; i < nelt; ++i)
34945 {
34946 unsigned e = d->perm[i] - min;
34947 if (e != i)
34948 in_order = false;
34949 d->perm[i] = e;
34950 }
34951
34952 /* Test for the degenerate case where the alignment by itself
34953 produces the desired permutation. */
34954 if (in_order)
34955 return true;
34956
34957 ok = expand_vec_perm_1 (d);
34958 gcc_assert (ok);
34959
34960 return ok;
34961 }
34962
34963 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34964 a two vector permutation into a single vector permutation by using
34965 an interleave operation to merge the vectors. */
34966
34967 static bool
34968 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
34969 {
34970 struct expand_vec_perm_d dremap, dfinal;
34971 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
34972 unsigned contents, h1, h2, h3, h4;
34973 unsigned char remap[2 * MAX_VECT_LEN];
34974 rtx seq;
34975 bool ok;
34976
34977 if (d->op0 == d->op1)
34978 return false;
34979
34980 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
34981 lanes. We can use similar techniques with the vperm2f128 instruction,
34982 but it requires slightly different logic. */
34983 if (GET_MODE_SIZE (d->vmode) != 16)
34984 return false;
34985
34986 /* Examine from whence the elements come. */
34987 contents = 0;
34988 for (i = 0; i < nelt; ++i)
34989 contents |= 1u << d->perm[i];
34990
34991 /* Split the two input vectors into 4 halves. */
34992 h1 = (1u << nelt2) - 1;
34993 h2 = h1 << nelt2;
34994 h3 = h2 << nelt2;
34995 h4 = h3 << nelt2;
34996
34997 memset (remap, 0xff, sizeof (remap));
34998 dremap = *d;
34999
35000 /* If the elements from the low halves use interleave low, and similarly
35001 for interleave high. If the elements are from mis-matched halves, we
35002 can use shufps for V4SF/V4SI or do a DImode shuffle. */
35003 if ((contents & (h1 | h3)) == contents)
35004 {
35005 for (i = 0; i < nelt2; ++i)
35006 {
35007 remap[i] = i * 2;
35008 remap[i + nelt] = i * 2 + 1;
35009 dremap.perm[i * 2] = i;
35010 dremap.perm[i * 2 + 1] = i + nelt;
35011 }
35012 }
35013 else if ((contents & (h2 | h4)) == contents)
35014 {
35015 for (i = 0; i < nelt2; ++i)
35016 {
35017 remap[i + nelt2] = i * 2;
35018 remap[i + nelt + nelt2] = i * 2 + 1;
35019 dremap.perm[i * 2] = i + nelt2;
35020 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
35021 }
35022 }
35023 else if ((contents & (h1 | h4)) == contents)
35024 {
35025 for (i = 0; i < nelt2; ++i)
35026 {
35027 remap[i] = i;
35028 remap[i + nelt + nelt2] = i + nelt2;
35029 dremap.perm[i] = i;
35030 dremap.perm[i + nelt2] = i + nelt + nelt2;
35031 }
35032 if (nelt != 4)
35033 {
35034 dremap.vmode = V2DImode;
35035 dremap.nelt = 2;
35036 dremap.perm[0] = 0;
35037 dremap.perm[1] = 3;
35038 }
35039 }
35040 else if ((contents & (h2 | h3)) == contents)
35041 {
35042 for (i = 0; i < nelt2; ++i)
35043 {
35044 remap[i + nelt2] = i;
35045 remap[i + nelt] = i + nelt2;
35046 dremap.perm[i] = i + nelt2;
35047 dremap.perm[i + nelt2] = i + nelt;
35048 }
35049 if (nelt != 4)
35050 {
35051 dremap.vmode = V2DImode;
35052 dremap.nelt = 2;
35053 dremap.perm[0] = 1;
35054 dremap.perm[1] = 2;
35055 }
35056 }
35057 else
35058 return false;
35059
35060 /* Use the remapping array set up above to move the elements from their
35061 swizzled locations into their final destinations. */
35062 dfinal = *d;
35063 for (i = 0; i < nelt; ++i)
35064 {
35065 unsigned e = remap[d->perm[i]];
35066 gcc_assert (e < nelt);
35067 dfinal.perm[i] = e;
35068 }
35069 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
35070 dfinal.op1 = dfinal.op0;
35071 dremap.target = dfinal.op0;
35072
35073 /* Test if the final remap can be done with a single insn. For V4SFmode or
35074 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
35075 start_sequence ();
35076 ok = expand_vec_perm_1 (&dfinal);
35077 seq = get_insns ();
35078 end_sequence ();
35079
35080 if (!ok)
35081 return false;
35082
35083 if (dremap.vmode != dfinal.vmode)
35084 {
35085 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
35086 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
35087 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
35088 }
35089
35090 ok = expand_vec_perm_1 (&dremap);
35091 gcc_assert (ok);
35092
35093 emit_insn (seq);
35094 return true;
35095 }
35096
35097 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
35098 permutation with two pshufb insns and an ior. We should have already
35099 failed all two instruction sequences. */
35100
35101 static bool
35102 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
35103 {
35104 rtx rperm[2][16], vperm, l, h, op, m128;
35105 unsigned int i, nelt, eltsz;
35106
35107 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
35108 return false;
35109 gcc_assert (d->op0 != d->op1);
35110
35111 nelt = d->nelt;
35112 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35113
35114 /* Generate two permutation masks. If the required element is within
35115 the given vector it is shuffled into the proper lane. If the required
35116 element is in the other vector, force a zero into the lane by setting
35117 bit 7 in the permutation mask. */
35118 m128 = GEN_INT (-128);
35119 for (i = 0; i < nelt; ++i)
35120 {
35121 unsigned j, e = d->perm[i];
35122 unsigned which = (e >= nelt);
35123 if (e >= nelt)
35124 e -= nelt;
35125
35126 for (j = 0; j < eltsz; ++j)
35127 {
35128 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
35129 rperm[1-which][i*eltsz + j] = m128;
35130 }
35131 }
35132
35133 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
35134 vperm = force_reg (V16QImode, vperm);
35135
35136 l = gen_reg_rtx (V16QImode);
35137 op = gen_lowpart (V16QImode, d->op0);
35138 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
35139
35140 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
35141 vperm = force_reg (V16QImode, vperm);
35142
35143 h = gen_reg_rtx (V16QImode);
35144 op = gen_lowpart (V16QImode, d->op1);
35145 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
35146
35147 op = gen_lowpart (V16QImode, d->target);
35148 emit_insn (gen_iorv16qi3 (op, l, h));
35149
35150 return true;
35151 }
35152
35153 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
35154 and extract-odd permutations. */
35155
35156 static bool
35157 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
35158 {
35159 rtx t1, t2, t3;
35160
35161 switch (d->vmode)
35162 {
35163 case V4DFmode:
35164 t1 = gen_reg_rtx (V4DFmode);
35165 t2 = gen_reg_rtx (V4DFmode);
35166
35167 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
35168 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
35169 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
35170
35171 /* Now an unpck[lh]pd will produce the result required. */
35172 if (odd)
35173 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
35174 else
35175 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
35176 emit_insn (t3);
35177 break;
35178
35179 case V8SFmode:
35180 {
35181 int mask = odd ? 0xdd : 0x88;
35182
35183 t1 = gen_reg_rtx (V8SFmode);
35184 t2 = gen_reg_rtx (V8SFmode);
35185 t3 = gen_reg_rtx (V8SFmode);
35186
35187 /* Shuffle within the 128-bit lanes to produce:
35188 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
35189 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
35190 GEN_INT (mask)));
35191
35192 /* Shuffle the lanes around to produce:
35193 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
35194 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
35195 GEN_INT (0x3)));
35196
35197 /* Shuffle within the 128-bit lanes to produce:
35198 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
35199 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
35200
35201 /* Shuffle within the 128-bit lanes to produce:
35202 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
35203 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
35204
35205 /* Shuffle the lanes around to produce:
35206 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
35207 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
35208 GEN_INT (0x20)));
35209 }
35210 break;
35211
35212 case V2DFmode:
35213 case V4SFmode:
35214 case V2DImode:
35215 case V4SImode:
35216 /* These are always directly implementable by expand_vec_perm_1. */
35217 gcc_unreachable ();
35218
35219 case V8HImode:
35220 if (TARGET_SSSE3)
35221 return expand_vec_perm_pshufb2 (d);
35222 else
35223 {
35224 /* We need 2*log2(N)-1 operations to achieve odd/even
35225 with interleave. */
35226 t1 = gen_reg_rtx (V8HImode);
35227 t2 = gen_reg_rtx (V8HImode);
35228 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
35229 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
35230 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
35231 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
35232 if (odd)
35233 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
35234 else
35235 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
35236 emit_insn (t3);
35237 }
35238 break;
35239
35240 case V16QImode:
35241 if (TARGET_SSSE3)
35242 return expand_vec_perm_pshufb2 (d);
35243 else
35244 {
35245 t1 = gen_reg_rtx (V16QImode);
35246 t2 = gen_reg_rtx (V16QImode);
35247 t3 = gen_reg_rtx (V16QImode);
35248 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
35249 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
35250 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
35251 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
35252 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
35253 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
35254 if (odd)
35255 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
35256 else
35257 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
35258 emit_insn (t3);
35259 }
35260 break;
35261
35262 default:
35263 gcc_unreachable ();
35264 }
35265
35266 return true;
35267 }
35268
35269 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
35270 extract-even and extract-odd permutations. */
35271
35272 static bool
35273 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
35274 {
35275 unsigned i, odd, nelt = d->nelt;
35276
35277 odd = d->perm[0];
35278 if (odd != 0 && odd != 1)
35279 return false;
35280
35281 for (i = 1; i < nelt; ++i)
35282 if (d->perm[i] != 2 * i + odd)
35283 return false;
35284
35285 return expand_vec_perm_even_odd_1 (d, odd);
35286 }
35287
35288 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
35289 permutations. We assume that expand_vec_perm_1 has already failed. */
35290
35291 static bool
35292 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
35293 {
35294 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
35295 enum machine_mode vmode = d->vmode;
35296 unsigned char perm2[4];
35297 rtx op0 = d->op0;
35298 bool ok;
35299
35300 switch (vmode)
35301 {
35302 case V4DFmode:
35303 case V8SFmode:
35304 /* These are special-cased in sse.md so that we can optionally
35305 use the vbroadcast instruction. They expand to two insns
35306 if the input happens to be in a register. */
35307 gcc_unreachable ();
35308
35309 case V2DFmode:
35310 case V2DImode:
35311 case V4SFmode:
35312 case V4SImode:
35313 /* These are always implementable using standard shuffle patterns. */
35314 gcc_unreachable ();
35315
35316 case V8HImode:
35317 case V16QImode:
35318 /* These can be implemented via interleave. We save one insn by
35319 stopping once we have promoted to V4SImode and then use pshufd. */
35320 do
35321 {
35322 optab otab = vec_interleave_low_optab;
35323
35324 if (elt >= nelt2)
35325 {
35326 otab = vec_interleave_high_optab;
35327 elt -= nelt2;
35328 }
35329 nelt2 /= 2;
35330
35331 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
35332 vmode = get_mode_wider_vector (vmode);
35333 op0 = gen_lowpart (vmode, op0);
35334 }
35335 while (vmode != V4SImode);
35336
35337 memset (perm2, elt, 4);
35338 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
35339 gcc_assert (ok);
35340 return true;
35341
35342 default:
35343 gcc_unreachable ();
35344 }
35345 }
35346
35347 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
35348 broadcast permutations. */
35349
35350 static bool
35351 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
35352 {
35353 unsigned i, elt, nelt = d->nelt;
35354
35355 if (d->op0 != d->op1)
35356 return false;
35357
35358 elt = d->perm[0];
35359 for (i = 1; i < nelt; ++i)
35360 if (d->perm[i] != elt)
35361 return false;
35362
35363 return expand_vec_perm_broadcast_1 (d);
35364 }
35365
35366 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
35367 With all of the interface bits taken care of, perform the expansion
35368 in D and return true on success. */
35369
35370 static bool
35371 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
35372 {
35373 /* Try a single instruction expansion. */
35374 if (expand_vec_perm_1 (d))
35375 return true;
35376
35377 /* Try sequences of two instructions. */
35378
35379 if (expand_vec_perm_pshuflw_pshufhw (d))
35380 return true;
35381
35382 if (expand_vec_perm_palignr (d))
35383 return true;
35384
35385 if (expand_vec_perm_interleave2 (d))
35386 return true;
35387
35388 if (expand_vec_perm_broadcast (d))
35389 return true;
35390
35391 /* Try sequences of three instructions. */
35392
35393 if (expand_vec_perm_pshufb2 (d))
35394 return true;
35395
35396 /* ??? Look for narrow permutations whose element orderings would
35397 allow the promotion to a wider mode. */
35398
35399 /* ??? Look for sequences of interleave or a wider permute that place
35400 the data into the correct lanes for a half-vector shuffle like
35401 pshuf[lh]w or vpermilps. */
35402
35403 /* ??? Look for sequences of interleave that produce the desired results.
35404 The combinatorics of punpck[lh] get pretty ugly... */
35405
35406 if (expand_vec_perm_even_odd (d))
35407 return true;
35408
35409 return false;
35410 }
35411
35412 /* Extract the values from the vector CST into the permutation array in D.
35413 Return 0 on error, 1 if all values from the permutation come from the
35414 first vector, 2 if all values from the second vector, and 3 otherwise. */
35415
35416 static int
35417 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
35418 {
35419 tree list = TREE_VECTOR_CST_ELTS (cst);
35420 unsigned i, nelt = d->nelt;
35421 int ret = 0;
35422
35423 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
35424 {
35425 unsigned HOST_WIDE_INT e;
35426
35427 if (!host_integerp (TREE_VALUE (list), 1))
35428 return 0;
35429 e = tree_low_cst (TREE_VALUE (list), 1);
35430 if (e >= 2 * nelt)
35431 return 0;
35432
35433 ret |= (e < nelt ? 1 : 2);
35434 d->perm[i] = e;
35435 }
35436 gcc_assert (list == NULL);
35437
35438 /* For all elements from second vector, fold the elements to first. */
35439 if (ret == 2)
35440 for (i = 0; i < nelt; ++i)
35441 d->perm[i] -= nelt;
35442
35443 return ret;
35444 }
35445
35446 static rtx
35447 ix86_expand_vec_perm_builtin (tree exp)
35448 {
35449 struct expand_vec_perm_d d;
35450 tree arg0, arg1, arg2;
35451
35452 arg0 = CALL_EXPR_ARG (exp, 0);
35453 arg1 = CALL_EXPR_ARG (exp, 1);
35454 arg2 = CALL_EXPR_ARG (exp, 2);
35455
35456 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
35457 d.nelt = GET_MODE_NUNITS (d.vmode);
35458 d.testing_p = false;
35459 gcc_assert (VECTOR_MODE_P (d.vmode));
35460
35461 if (TREE_CODE (arg2) != VECTOR_CST)
35462 {
35463 error_at (EXPR_LOCATION (exp),
35464 "vector permutation requires vector constant");
35465 goto exit_error;
35466 }
35467
35468 switch (extract_vec_perm_cst (&d, arg2))
35469 {
35470 default:
35471 gcc_unreachable();
35472
35473 case 0:
35474 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
35475 goto exit_error;
35476
35477 case 3:
35478 if (!operand_equal_p (arg0, arg1, 0))
35479 {
35480 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35481 d.op0 = force_reg (d.vmode, d.op0);
35482 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35483 d.op1 = force_reg (d.vmode, d.op1);
35484 break;
35485 }
35486
35487 /* The elements of PERM do not suggest that only the first operand
35488 is used, but both operands are identical. Allow easier matching
35489 of the permutation by folding the permutation into the single
35490 input vector. */
35491 {
35492 unsigned i, nelt = d.nelt;
35493 for (i = 0; i < nelt; ++i)
35494 if (d.perm[i] >= nelt)
35495 d.perm[i] -= nelt;
35496 }
35497 /* FALLTHRU */
35498
35499 case 1:
35500 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35501 d.op0 = force_reg (d.vmode, d.op0);
35502 d.op1 = d.op0;
35503 break;
35504
35505 case 2:
35506 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35507 d.op0 = force_reg (d.vmode, d.op0);
35508 d.op1 = d.op0;
35509 break;
35510 }
35511
35512 d.target = gen_reg_rtx (d.vmode);
35513 if (ix86_expand_vec_perm_builtin_1 (&d))
35514 return d.target;
35515
35516 /* For compiler generated permutations, we should never got here, because
35517 the compiler should also be checking the ok hook. But since this is a
35518 builtin the user has access too, so don't abort. */
35519 switch (d.nelt)
35520 {
35521 case 2:
35522 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
35523 break;
35524 case 4:
35525 sorry ("vector permutation (%d %d %d %d)",
35526 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
35527 break;
35528 case 8:
35529 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
35530 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35531 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
35532 break;
35533 case 16:
35534 sorry ("vector permutation "
35535 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
35536 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35537 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
35538 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
35539 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
35540 break;
35541 default:
35542 gcc_unreachable ();
35543 }
35544 exit_error:
35545 return CONST0_RTX (d.vmode);
35546 }
35547
35548 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
35549
35550 static bool
35551 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
35552 {
35553 struct expand_vec_perm_d d;
35554 int vec_mask;
35555 bool ret, one_vec;
35556
35557 d.vmode = TYPE_MODE (vec_type);
35558 d.nelt = GET_MODE_NUNITS (d.vmode);
35559 d.testing_p = true;
35560
35561 /* Given sufficient ISA support we can just return true here
35562 for selected vector modes. */
35563 if (GET_MODE_SIZE (d.vmode) == 16)
35564 {
35565 /* All implementable with a single vpperm insn. */
35566 if (TARGET_XOP)
35567 return true;
35568 /* All implementable with 2 pshufb + 1 ior. */
35569 if (TARGET_SSSE3)
35570 return true;
35571 /* All implementable with shufpd or unpck[lh]pd. */
35572 if (d.nelt == 2)
35573 return true;
35574 }
35575
35576 vec_mask = extract_vec_perm_cst (&d, mask);
35577
35578 /* Check whether the mask can be applied to the vector type. */
35579 if (vec_mask < 0 || vec_mask > 3)
35580 return false;
35581
35582 one_vec = (vec_mask != 3);
35583
35584 /* Implementable with shufps or pshufd. */
35585 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
35586 return true;
35587
35588 /* Otherwise we have to go through the motions and see if we can
35589 figure out how to generate the requested permutation. */
35590 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
35591 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
35592 if (!one_vec)
35593 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
35594
35595 start_sequence ();
35596 ret = ix86_expand_vec_perm_builtin_1 (&d);
35597 end_sequence ();
35598
35599 return ret;
35600 }
35601
35602 void
35603 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
35604 {
35605 struct expand_vec_perm_d d;
35606 unsigned i, nelt;
35607
35608 d.target = targ;
35609 d.op0 = op0;
35610 d.op1 = op1;
35611 d.vmode = GET_MODE (targ);
35612 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
35613 d.testing_p = false;
35614
35615 for (i = 0; i < nelt; ++i)
35616 d.perm[i] = i * 2 + odd;
35617
35618 /* We'll either be able to implement the permutation directly... */
35619 if (expand_vec_perm_1 (&d))
35620 return;
35621
35622 /* ... or we use the special-case patterns. */
35623 expand_vec_perm_even_odd_1 (&d, odd);
35624 }
35625
35626 /* Expand an insert into a vector register through pinsr insn.
35627 Return true if successful. */
35628
35629 bool
35630 ix86_expand_pinsr (rtx *operands)
35631 {
35632 rtx dst = operands[0];
35633 rtx src = operands[3];
35634
35635 unsigned int size = INTVAL (operands[1]);
35636 unsigned int pos = INTVAL (operands[2]);
35637
35638 if (GET_CODE (dst) == SUBREG)
35639 {
35640 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
35641 dst = SUBREG_REG (dst);
35642 }
35643
35644 if (GET_CODE (src) == SUBREG)
35645 src = SUBREG_REG (src);
35646
35647 switch (GET_MODE (dst))
35648 {
35649 case V16QImode:
35650 case V8HImode:
35651 case V4SImode:
35652 case V2DImode:
35653 {
35654 enum machine_mode srcmode, dstmode;
35655 rtx (*pinsr)(rtx, rtx, rtx, rtx);
35656
35657 srcmode = mode_for_size (size, MODE_INT, 0);
35658
35659 switch (srcmode)
35660 {
35661 case QImode:
35662 if (!TARGET_SSE4_1)
35663 return false;
35664 dstmode = V16QImode;
35665 pinsr = gen_sse4_1_pinsrb;
35666 break;
35667
35668 case HImode:
35669 if (!TARGET_SSE2)
35670 return false;
35671 dstmode = V8HImode;
35672 pinsr = gen_sse2_pinsrw;
35673 break;
35674
35675 case SImode:
35676 if (!TARGET_SSE4_1)
35677 return false;
35678 dstmode = V4SImode;
35679 pinsr = gen_sse4_1_pinsrd;
35680 break;
35681
35682 case DImode:
35683 gcc_assert (TARGET_64BIT);
35684 if (!TARGET_SSE4_1)
35685 return false;
35686 dstmode = V2DImode;
35687 pinsr = gen_sse4_1_pinsrq;
35688 break;
35689
35690 default:
35691 return false;
35692 }
35693
35694 dst = gen_lowpart (dstmode, dst);
35695 src = gen_lowpart (srcmode, src);
35696
35697 pos /= size;
35698
35699 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
35700 return true;
35701 }
35702
35703 default:
35704 return false;
35705 }
35706 }
35707 \f
35708 /* This function returns the calling abi specific va_list type node.
35709 It returns the FNDECL specific va_list type. */
35710
35711 static tree
35712 ix86_fn_abi_va_list (tree fndecl)
35713 {
35714 if (!TARGET_64BIT)
35715 return va_list_type_node;
35716 gcc_assert (fndecl != NULL_TREE);
35717
35718 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
35719 return ms_va_list_type_node;
35720 else
35721 return sysv_va_list_type_node;
35722 }
35723
35724 /* Returns the canonical va_list type specified by TYPE. If there
35725 is no valid TYPE provided, it return NULL_TREE. */
35726
35727 static tree
35728 ix86_canonical_va_list_type (tree type)
35729 {
35730 tree wtype, htype;
35731
35732 /* Resolve references and pointers to va_list type. */
35733 if (TREE_CODE (type) == MEM_REF)
35734 type = TREE_TYPE (type);
35735 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
35736 type = TREE_TYPE (type);
35737 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
35738 type = TREE_TYPE (type);
35739
35740 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
35741 {
35742 wtype = va_list_type_node;
35743 gcc_assert (wtype != NULL_TREE);
35744 htype = type;
35745 if (TREE_CODE (wtype) == ARRAY_TYPE)
35746 {
35747 /* If va_list is an array type, the argument may have decayed
35748 to a pointer type, e.g. by being passed to another function.
35749 In that case, unwrap both types so that we can compare the
35750 underlying records. */
35751 if (TREE_CODE (htype) == ARRAY_TYPE
35752 || POINTER_TYPE_P (htype))
35753 {
35754 wtype = TREE_TYPE (wtype);
35755 htype = TREE_TYPE (htype);
35756 }
35757 }
35758 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35759 return va_list_type_node;
35760 wtype = sysv_va_list_type_node;
35761 gcc_assert (wtype != NULL_TREE);
35762 htype = type;
35763 if (TREE_CODE (wtype) == ARRAY_TYPE)
35764 {
35765 /* If va_list is an array type, the argument may have decayed
35766 to a pointer type, e.g. by being passed to another function.
35767 In that case, unwrap both types so that we can compare the
35768 underlying records. */
35769 if (TREE_CODE (htype) == ARRAY_TYPE
35770 || POINTER_TYPE_P (htype))
35771 {
35772 wtype = TREE_TYPE (wtype);
35773 htype = TREE_TYPE (htype);
35774 }
35775 }
35776 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35777 return sysv_va_list_type_node;
35778 wtype = ms_va_list_type_node;
35779 gcc_assert (wtype != NULL_TREE);
35780 htype = type;
35781 if (TREE_CODE (wtype) == ARRAY_TYPE)
35782 {
35783 /* If va_list is an array type, the argument may have decayed
35784 to a pointer type, e.g. by being passed to another function.
35785 In that case, unwrap both types so that we can compare the
35786 underlying records. */
35787 if (TREE_CODE (htype) == ARRAY_TYPE
35788 || POINTER_TYPE_P (htype))
35789 {
35790 wtype = TREE_TYPE (wtype);
35791 htype = TREE_TYPE (htype);
35792 }
35793 }
35794 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35795 return ms_va_list_type_node;
35796 return NULL_TREE;
35797 }
35798 return std_canonical_va_list_type (type);
35799 }
35800
35801 /* Iterate through the target-specific builtin types for va_list.
35802 IDX denotes the iterator, *PTREE is set to the result type of
35803 the va_list builtin, and *PNAME to its internal type.
35804 Returns zero if there is no element for this index, otherwise
35805 IDX should be increased upon the next call.
35806 Note, do not iterate a base builtin's name like __builtin_va_list.
35807 Used from c_common_nodes_and_builtins. */
35808
35809 static int
35810 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
35811 {
35812 if (TARGET_64BIT)
35813 {
35814 switch (idx)
35815 {
35816 default:
35817 break;
35818
35819 case 0:
35820 *ptree = ms_va_list_type_node;
35821 *pname = "__builtin_ms_va_list";
35822 return 1;
35823
35824 case 1:
35825 *ptree = sysv_va_list_type_node;
35826 *pname = "__builtin_sysv_va_list";
35827 return 1;
35828 }
35829 }
35830
35831 return 0;
35832 }
35833
35834 #undef TARGET_SCHED_DISPATCH
35835 #define TARGET_SCHED_DISPATCH has_dispatch
35836 #undef TARGET_SCHED_DISPATCH_DO
35837 #define TARGET_SCHED_DISPATCH_DO do_dispatch
35838 #undef TARGET_SCHED_REASSOCIATION_WIDTH
35839 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
35840
35841 /* The size of the dispatch window is the total number of bytes of
35842 object code allowed in a window. */
35843 #define DISPATCH_WINDOW_SIZE 16
35844
35845 /* Number of dispatch windows considered for scheduling. */
35846 #define MAX_DISPATCH_WINDOWS 3
35847
35848 /* Maximum number of instructions in a window. */
35849 #define MAX_INSN 4
35850
35851 /* Maximum number of immediate operands in a window. */
35852 #define MAX_IMM 4
35853
35854 /* Maximum number of immediate bits allowed in a window. */
35855 #define MAX_IMM_SIZE 128
35856
35857 /* Maximum number of 32 bit immediates allowed in a window. */
35858 #define MAX_IMM_32 4
35859
35860 /* Maximum number of 64 bit immediates allowed in a window. */
35861 #define MAX_IMM_64 2
35862
35863 /* Maximum total of loads or prefetches allowed in a window. */
35864 #define MAX_LOAD 2
35865
35866 /* Maximum total of stores allowed in a window. */
35867 #define MAX_STORE 1
35868
35869 #undef BIG
35870 #define BIG 100
35871
35872
35873 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
35874 enum dispatch_group {
35875 disp_no_group = 0,
35876 disp_load,
35877 disp_store,
35878 disp_load_store,
35879 disp_prefetch,
35880 disp_imm,
35881 disp_imm_32,
35882 disp_imm_64,
35883 disp_branch,
35884 disp_cmp,
35885 disp_jcc,
35886 disp_last
35887 };
35888
35889 /* Number of allowable groups in a dispatch window. It is an array
35890 indexed by dispatch_group enum. 100 is used as a big number,
35891 because the number of these kind of operations does not have any
35892 effect in dispatch window, but we need them for other reasons in
35893 the table. */
35894 static unsigned int num_allowable_groups[disp_last] = {
35895 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
35896 };
35897
35898 char group_name[disp_last + 1][16] = {
35899 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
35900 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
35901 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
35902 };
35903
35904 /* Instruction path. */
35905 enum insn_path {
35906 no_path = 0,
35907 path_single, /* Single micro op. */
35908 path_double, /* Double micro op. */
35909 path_multi, /* Instructions with more than 2 micro op.. */
35910 last_path
35911 };
35912
35913 /* sched_insn_info defines a window to the instructions scheduled in
35914 the basic block. It contains a pointer to the insn_info table and
35915 the instruction scheduled.
35916
35917 Windows are allocated for each basic block and are linked
35918 together. */
35919 typedef struct sched_insn_info_s {
35920 rtx insn;
35921 enum dispatch_group group;
35922 enum insn_path path;
35923 int byte_len;
35924 int imm_bytes;
35925 } sched_insn_info;
35926
35927 /* Linked list of dispatch windows. This is a two way list of
35928 dispatch windows of a basic block. It contains information about
35929 the number of uops in the window and the total number of
35930 instructions and of bytes in the object code for this dispatch
35931 window. */
35932 typedef struct dispatch_windows_s {
35933 int num_insn; /* Number of insn in the window. */
35934 int num_uops; /* Number of uops in the window. */
35935 int window_size; /* Number of bytes in the window. */
35936 int window_num; /* Window number between 0 or 1. */
35937 int num_imm; /* Number of immediates in an insn. */
35938 int num_imm_32; /* Number of 32 bit immediates in an insn. */
35939 int num_imm_64; /* Number of 64 bit immediates in an insn. */
35940 int imm_size; /* Total immediates in the window. */
35941 int num_loads; /* Total memory loads in the window. */
35942 int num_stores; /* Total memory stores in the window. */
35943 int violation; /* Violation exists in window. */
35944 sched_insn_info *window; /* Pointer to the window. */
35945 struct dispatch_windows_s *next;
35946 struct dispatch_windows_s *prev;
35947 } dispatch_windows;
35948
35949 /* Immediate valuse used in an insn. */
35950 typedef struct imm_info_s
35951 {
35952 int imm;
35953 int imm32;
35954 int imm64;
35955 } imm_info;
35956
35957 static dispatch_windows *dispatch_window_list;
35958 static dispatch_windows *dispatch_window_list1;
35959
35960 /* Get dispatch group of insn. */
35961
35962 static enum dispatch_group
35963 get_mem_group (rtx insn)
35964 {
35965 enum attr_memory memory;
35966
35967 if (INSN_CODE (insn) < 0)
35968 return disp_no_group;
35969 memory = get_attr_memory (insn);
35970 if (memory == MEMORY_STORE)
35971 return disp_store;
35972
35973 if (memory == MEMORY_LOAD)
35974 return disp_load;
35975
35976 if (memory == MEMORY_BOTH)
35977 return disp_load_store;
35978
35979 return disp_no_group;
35980 }
35981
35982 /* Return true if insn is a compare instruction. */
35983
35984 static bool
35985 is_cmp (rtx insn)
35986 {
35987 enum attr_type type;
35988
35989 type = get_attr_type (insn);
35990 return (type == TYPE_TEST
35991 || type == TYPE_ICMP
35992 || type == TYPE_FCMP
35993 || GET_CODE (PATTERN (insn)) == COMPARE);
35994 }
35995
35996 /* Return true if a dispatch violation encountered. */
35997
35998 static bool
35999 dispatch_violation (void)
36000 {
36001 if (dispatch_window_list->next)
36002 return dispatch_window_list->next->violation;
36003 return dispatch_window_list->violation;
36004 }
36005
36006 /* Return true if insn is a branch instruction. */
36007
36008 static bool
36009 is_branch (rtx insn)
36010 {
36011 return (CALL_P (insn) || JUMP_P (insn));
36012 }
36013
36014 /* Return true if insn is a prefetch instruction. */
36015
36016 static bool
36017 is_prefetch (rtx insn)
36018 {
36019 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
36020 }
36021
36022 /* This function initializes a dispatch window and the list container holding a
36023 pointer to the window. */
36024
36025 static void
36026 init_window (int window_num)
36027 {
36028 int i;
36029 dispatch_windows *new_list;
36030
36031 if (window_num == 0)
36032 new_list = dispatch_window_list;
36033 else
36034 new_list = dispatch_window_list1;
36035
36036 new_list->num_insn = 0;
36037 new_list->num_uops = 0;
36038 new_list->window_size = 0;
36039 new_list->next = NULL;
36040 new_list->prev = NULL;
36041 new_list->window_num = window_num;
36042 new_list->num_imm = 0;
36043 new_list->num_imm_32 = 0;
36044 new_list->num_imm_64 = 0;
36045 new_list->imm_size = 0;
36046 new_list->num_loads = 0;
36047 new_list->num_stores = 0;
36048 new_list->violation = false;
36049
36050 for (i = 0; i < MAX_INSN; i++)
36051 {
36052 new_list->window[i].insn = NULL;
36053 new_list->window[i].group = disp_no_group;
36054 new_list->window[i].path = no_path;
36055 new_list->window[i].byte_len = 0;
36056 new_list->window[i].imm_bytes = 0;
36057 }
36058 return;
36059 }
36060
36061 /* This function allocates and initializes a dispatch window and the
36062 list container holding a pointer to the window. */
36063
36064 static dispatch_windows *
36065 allocate_window (void)
36066 {
36067 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
36068 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
36069
36070 return new_list;
36071 }
36072
36073 /* This routine initializes the dispatch scheduling information. It
36074 initiates building dispatch scheduler tables and constructs the
36075 first dispatch window. */
36076
36077 static void
36078 init_dispatch_sched (void)
36079 {
36080 /* Allocate a dispatch list and a window. */
36081 dispatch_window_list = allocate_window ();
36082 dispatch_window_list1 = allocate_window ();
36083 init_window (0);
36084 init_window (1);
36085 }
36086
36087 /* This function returns true if a branch is detected. End of a basic block
36088 does not have to be a branch, but here we assume only branches end a
36089 window. */
36090
36091 static bool
36092 is_end_basic_block (enum dispatch_group group)
36093 {
36094 return group == disp_branch;
36095 }
36096
36097 /* This function is called when the end of a window processing is reached. */
36098
36099 static void
36100 process_end_window (void)
36101 {
36102 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
36103 if (dispatch_window_list->next)
36104 {
36105 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
36106 gcc_assert (dispatch_window_list->window_size
36107 + dispatch_window_list1->window_size <= 48);
36108 init_window (1);
36109 }
36110 init_window (0);
36111 }
36112
36113 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
36114 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
36115 for 48 bytes of instructions. Note that these windows are not dispatch
36116 windows that their sizes are DISPATCH_WINDOW_SIZE. */
36117
36118 static dispatch_windows *
36119 allocate_next_window (int window_num)
36120 {
36121 if (window_num == 0)
36122 {
36123 if (dispatch_window_list->next)
36124 init_window (1);
36125 init_window (0);
36126 return dispatch_window_list;
36127 }
36128
36129 dispatch_window_list->next = dispatch_window_list1;
36130 dispatch_window_list1->prev = dispatch_window_list;
36131
36132 return dispatch_window_list1;
36133 }
36134
36135 /* Increment the number of immediate operands of an instruction. */
36136
36137 static int
36138 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
36139 {
36140 if (*in_rtx == 0)
36141 return 0;
36142
36143 switch ( GET_CODE (*in_rtx))
36144 {
36145 case CONST:
36146 case SYMBOL_REF:
36147 case CONST_INT:
36148 (imm_values->imm)++;
36149 if (x86_64_immediate_operand (*in_rtx, SImode))
36150 (imm_values->imm32)++;
36151 else
36152 (imm_values->imm64)++;
36153 break;
36154
36155 case CONST_DOUBLE:
36156 (imm_values->imm)++;
36157 (imm_values->imm64)++;
36158 break;
36159
36160 case CODE_LABEL:
36161 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
36162 {
36163 (imm_values->imm)++;
36164 (imm_values->imm32)++;
36165 }
36166 break;
36167
36168 default:
36169 break;
36170 }
36171
36172 return 0;
36173 }
36174
36175 /* Compute number of immediate operands of an instruction. */
36176
36177 static void
36178 find_constant (rtx in_rtx, imm_info *imm_values)
36179 {
36180 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
36181 (rtx_function) find_constant_1, (void *) imm_values);
36182 }
36183
36184 /* Return total size of immediate operands of an instruction along with number
36185 of corresponding immediate-operands. It initializes its parameters to zero
36186 befor calling FIND_CONSTANT.
36187 INSN is the input instruction. IMM is the total of immediates.
36188 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
36189 bit immediates. */
36190
36191 static int
36192 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
36193 {
36194 imm_info imm_values = {0, 0, 0};
36195
36196 find_constant (insn, &imm_values);
36197 *imm = imm_values.imm;
36198 *imm32 = imm_values.imm32;
36199 *imm64 = imm_values.imm64;
36200 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
36201 }
36202
36203 /* This function indicates if an operand of an instruction is an
36204 immediate. */
36205
36206 static bool
36207 has_immediate (rtx insn)
36208 {
36209 int num_imm_operand;
36210 int num_imm32_operand;
36211 int num_imm64_operand;
36212
36213 if (insn)
36214 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36215 &num_imm64_operand);
36216 return false;
36217 }
36218
36219 /* Return single or double path for instructions. */
36220
36221 static enum insn_path
36222 get_insn_path (rtx insn)
36223 {
36224 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
36225
36226 if ((int)path == 0)
36227 return path_single;
36228
36229 if ((int)path == 1)
36230 return path_double;
36231
36232 return path_multi;
36233 }
36234
36235 /* Return insn dispatch group. */
36236
36237 static enum dispatch_group
36238 get_insn_group (rtx insn)
36239 {
36240 enum dispatch_group group = get_mem_group (insn);
36241 if (group)
36242 return group;
36243
36244 if (is_branch (insn))
36245 return disp_branch;
36246
36247 if (is_cmp (insn))
36248 return disp_cmp;
36249
36250 if (has_immediate (insn))
36251 return disp_imm;
36252
36253 if (is_prefetch (insn))
36254 return disp_prefetch;
36255
36256 return disp_no_group;
36257 }
36258
36259 /* Count number of GROUP restricted instructions in a dispatch
36260 window WINDOW_LIST. */
36261
36262 static int
36263 count_num_restricted (rtx insn, dispatch_windows *window_list)
36264 {
36265 enum dispatch_group group = get_insn_group (insn);
36266 int imm_size;
36267 int num_imm_operand;
36268 int num_imm32_operand;
36269 int num_imm64_operand;
36270
36271 if (group == disp_no_group)
36272 return 0;
36273
36274 if (group == disp_imm)
36275 {
36276 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36277 &num_imm64_operand);
36278 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
36279 || num_imm_operand + window_list->num_imm > MAX_IMM
36280 || (num_imm32_operand > 0
36281 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
36282 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
36283 || (num_imm64_operand > 0
36284 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
36285 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
36286 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
36287 && num_imm64_operand > 0
36288 && ((window_list->num_imm_64 > 0
36289 && window_list->num_insn >= 2)
36290 || window_list->num_insn >= 3)))
36291 return BIG;
36292
36293 return 1;
36294 }
36295
36296 if ((group == disp_load_store
36297 && (window_list->num_loads >= MAX_LOAD
36298 || window_list->num_stores >= MAX_STORE))
36299 || ((group == disp_load
36300 || group == disp_prefetch)
36301 && window_list->num_loads >= MAX_LOAD)
36302 || (group == disp_store
36303 && window_list->num_stores >= MAX_STORE))
36304 return BIG;
36305
36306 return 1;
36307 }
36308
36309 /* This function returns true if insn satisfies dispatch rules on the
36310 last window scheduled. */
36311
36312 static bool
36313 fits_dispatch_window (rtx insn)
36314 {
36315 dispatch_windows *window_list = dispatch_window_list;
36316 dispatch_windows *window_list_next = dispatch_window_list->next;
36317 unsigned int num_restrict;
36318 enum dispatch_group group = get_insn_group (insn);
36319 enum insn_path path = get_insn_path (insn);
36320 int sum;
36321
36322 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
36323 instructions should be given the lowest priority in the
36324 scheduling process in Haifa scheduler to make sure they will be
36325 scheduled in the same dispatch window as the refrence to them. */
36326 if (group == disp_jcc || group == disp_cmp)
36327 return false;
36328
36329 /* Check nonrestricted. */
36330 if (group == disp_no_group || group == disp_branch)
36331 return true;
36332
36333 /* Get last dispatch window. */
36334 if (window_list_next)
36335 window_list = window_list_next;
36336
36337 if (window_list->window_num == 1)
36338 {
36339 sum = window_list->prev->window_size + window_list->window_size;
36340
36341 if (sum == 32
36342 || (min_insn_size (insn) + sum) >= 48)
36343 /* Window 1 is full. Go for next window. */
36344 return true;
36345 }
36346
36347 num_restrict = count_num_restricted (insn, window_list);
36348
36349 if (num_restrict > num_allowable_groups[group])
36350 return false;
36351
36352 /* See if it fits in the first window. */
36353 if (window_list->window_num == 0)
36354 {
36355 /* The first widow should have only single and double path
36356 uops. */
36357 if (path == path_double
36358 && (window_list->num_uops + 2) > MAX_INSN)
36359 return false;
36360 else if (path != path_single)
36361 return false;
36362 }
36363 return true;
36364 }
36365
36366 /* Add an instruction INSN with NUM_UOPS micro-operations to the
36367 dispatch window WINDOW_LIST. */
36368
36369 static void
36370 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
36371 {
36372 int byte_len = min_insn_size (insn);
36373 int num_insn = window_list->num_insn;
36374 int imm_size;
36375 sched_insn_info *window = window_list->window;
36376 enum dispatch_group group = get_insn_group (insn);
36377 enum insn_path path = get_insn_path (insn);
36378 int num_imm_operand;
36379 int num_imm32_operand;
36380 int num_imm64_operand;
36381
36382 if (!window_list->violation && group != disp_cmp
36383 && !fits_dispatch_window (insn))
36384 window_list->violation = true;
36385
36386 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36387 &num_imm64_operand);
36388
36389 /* Initialize window with new instruction. */
36390 window[num_insn].insn = insn;
36391 window[num_insn].byte_len = byte_len;
36392 window[num_insn].group = group;
36393 window[num_insn].path = path;
36394 window[num_insn].imm_bytes = imm_size;
36395
36396 window_list->window_size += byte_len;
36397 window_list->num_insn = num_insn + 1;
36398 window_list->num_uops = window_list->num_uops + num_uops;
36399 window_list->imm_size += imm_size;
36400 window_list->num_imm += num_imm_operand;
36401 window_list->num_imm_32 += num_imm32_operand;
36402 window_list->num_imm_64 += num_imm64_operand;
36403
36404 if (group == disp_store)
36405 window_list->num_stores += 1;
36406 else if (group == disp_load
36407 || group == disp_prefetch)
36408 window_list->num_loads += 1;
36409 else if (group == disp_load_store)
36410 {
36411 window_list->num_stores += 1;
36412 window_list->num_loads += 1;
36413 }
36414 }
36415
36416 /* Adds a scheduled instruction, INSN, to the current dispatch window.
36417 If the total bytes of instructions or the number of instructions in
36418 the window exceed allowable, it allocates a new window. */
36419
36420 static void
36421 add_to_dispatch_window (rtx insn)
36422 {
36423 int byte_len;
36424 dispatch_windows *window_list;
36425 dispatch_windows *next_list;
36426 dispatch_windows *window0_list;
36427 enum insn_path path;
36428 enum dispatch_group insn_group;
36429 bool insn_fits;
36430 int num_insn;
36431 int num_uops;
36432 int window_num;
36433 int insn_num_uops;
36434 int sum;
36435
36436 if (INSN_CODE (insn) < 0)
36437 return;
36438
36439 byte_len = min_insn_size (insn);
36440 window_list = dispatch_window_list;
36441 next_list = window_list->next;
36442 path = get_insn_path (insn);
36443 insn_group = get_insn_group (insn);
36444
36445 /* Get the last dispatch window. */
36446 if (next_list)
36447 window_list = dispatch_window_list->next;
36448
36449 if (path == path_single)
36450 insn_num_uops = 1;
36451 else if (path == path_double)
36452 insn_num_uops = 2;
36453 else
36454 insn_num_uops = (int) path;
36455
36456 /* If current window is full, get a new window.
36457 Window number zero is full, if MAX_INSN uops are scheduled in it.
36458 Window number one is full, if window zero's bytes plus window
36459 one's bytes is 32, or if the bytes of the new instruction added
36460 to the total makes it greater than 48, or it has already MAX_INSN
36461 instructions in it. */
36462 num_insn = window_list->num_insn;
36463 num_uops = window_list->num_uops;
36464 window_num = window_list->window_num;
36465 insn_fits = fits_dispatch_window (insn);
36466
36467 if (num_insn >= MAX_INSN
36468 || num_uops + insn_num_uops > MAX_INSN
36469 || !(insn_fits))
36470 {
36471 window_num = ~window_num & 1;
36472 window_list = allocate_next_window (window_num);
36473 }
36474
36475 if (window_num == 0)
36476 {
36477 add_insn_window (insn, window_list, insn_num_uops);
36478 if (window_list->num_insn >= MAX_INSN
36479 && insn_group == disp_branch)
36480 {
36481 process_end_window ();
36482 return;
36483 }
36484 }
36485 else if (window_num == 1)
36486 {
36487 window0_list = window_list->prev;
36488 sum = window0_list->window_size + window_list->window_size;
36489 if (sum == 32
36490 || (byte_len + sum) >= 48)
36491 {
36492 process_end_window ();
36493 window_list = dispatch_window_list;
36494 }
36495
36496 add_insn_window (insn, window_list, insn_num_uops);
36497 }
36498 else
36499 gcc_unreachable ();
36500
36501 if (is_end_basic_block (insn_group))
36502 {
36503 /* End of basic block is reached do end-basic-block process. */
36504 process_end_window ();
36505 return;
36506 }
36507 }
36508
36509 /* Print the dispatch window, WINDOW_NUM, to FILE. */
36510
36511 DEBUG_FUNCTION static void
36512 debug_dispatch_window_file (FILE *file, int window_num)
36513 {
36514 dispatch_windows *list;
36515 int i;
36516
36517 if (window_num == 0)
36518 list = dispatch_window_list;
36519 else
36520 list = dispatch_window_list1;
36521
36522 fprintf (file, "Window #%d:\n", list->window_num);
36523 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
36524 list->num_insn, list->num_uops, list->window_size);
36525 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36526 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
36527
36528 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
36529 list->num_stores);
36530 fprintf (file, " insn info:\n");
36531
36532 for (i = 0; i < MAX_INSN; i++)
36533 {
36534 if (!list->window[i].insn)
36535 break;
36536 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
36537 i, group_name[list->window[i].group],
36538 i, (void *)list->window[i].insn,
36539 i, list->window[i].path,
36540 i, list->window[i].byte_len,
36541 i, list->window[i].imm_bytes);
36542 }
36543 }
36544
36545 /* Print to stdout a dispatch window. */
36546
36547 DEBUG_FUNCTION void
36548 debug_dispatch_window (int window_num)
36549 {
36550 debug_dispatch_window_file (stdout, window_num);
36551 }
36552
36553 /* Print INSN dispatch information to FILE. */
36554
36555 DEBUG_FUNCTION static void
36556 debug_insn_dispatch_info_file (FILE *file, rtx insn)
36557 {
36558 int byte_len;
36559 enum insn_path path;
36560 enum dispatch_group group;
36561 int imm_size;
36562 int num_imm_operand;
36563 int num_imm32_operand;
36564 int num_imm64_operand;
36565
36566 if (INSN_CODE (insn) < 0)
36567 return;
36568
36569 byte_len = min_insn_size (insn);
36570 path = get_insn_path (insn);
36571 group = get_insn_group (insn);
36572 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36573 &num_imm64_operand);
36574
36575 fprintf (file, " insn info:\n");
36576 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
36577 group_name[group], path, byte_len);
36578 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36579 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
36580 }
36581
36582 /* Print to STDERR the status of the ready list with respect to
36583 dispatch windows. */
36584
36585 DEBUG_FUNCTION void
36586 debug_ready_dispatch (void)
36587 {
36588 int i;
36589 int no_ready = number_in_ready ();
36590
36591 fprintf (stdout, "Number of ready: %d\n", no_ready);
36592
36593 for (i = 0; i < no_ready; i++)
36594 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
36595 }
36596
36597 /* This routine is the driver of the dispatch scheduler. */
36598
36599 static void
36600 do_dispatch (rtx insn, int mode)
36601 {
36602 if (mode == DISPATCH_INIT)
36603 init_dispatch_sched ();
36604 else if (mode == ADD_TO_DISPATCH_WINDOW)
36605 add_to_dispatch_window (insn);
36606 }
36607
36608 /* Return TRUE if Dispatch Scheduling is supported. */
36609
36610 static bool
36611 has_dispatch (rtx insn, int action)
36612 {
36613 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
36614 && flag_dispatch_scheduler)
36615 switch (action)
36616 {
36617 default:
36618 return false;
36619
36620 case IS_DISPATCH_ON:
36621 return true;
36622 break;
36623
36624 case IS_CMP:
36625 return is_cmp (insn);
36626
36627 case DISPATCH_VIOLATION:
36628 return dispatch_violation ();
36629
36630 case FITS_DISPATCH_WINDOW:
36631 return fits_dispatch_window (insn);
36632 }
36633
36634 return false;
36635 }
36636
36637 /* Implementation of reassociation_width target hook used by
36638 reassoc phase to identify parallelism level in reassociated
36639 tree. Statements tree_code is passed in OPC. Arguments type
36640 is passed in MODE.
36641
36642 Currently parallel reassociation is enabled for Atom
36643 processors only and we set reassociation width to be 2
36644 because Atom may issue up to 2 instructions per cycle.
36645
36646 Return value should be fixed if parallel reassociation is
36647 enabled for other processors. */
36648
36649 static int
36650 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
36651 enum machine_mode mode)
36652 {
36653 int res = 1;
36654
36655 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
36656 res = 2;
36657 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
36658 res = 2;
36659
36660 return res;
36661 }
36662
36663 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
36664 place emms and femms instructions. */
36665
36666 static enum machine_mode
36667 ix86_preferred_simd_mode (enum machine_mode mode)
36668 {
36669 if (!TARGET_SSE)
36670 return word_mode;
36671
36672 switch (mode)
36673 {
36674 case QImode:
36675 return TARGET_AVX2 ? V32QImode : V16QImode;
36676 case HImode:
36677 return TARGET_AVX2 ? V16HImode : V8HImode;
36678 case SImode:
36679 return TARGET_AVX2 ? V8SImode : V4SImode;
36680 case DImode:
36681 return TARGET_AVX2 ? V4DImode : V2DImode;
36682
36683 case SFmode:
36684 if (TARGET_AVX && !TARGET_PREFER_AVX128)
36685 return V8SFmode;
36686 else
36687 return V4SFmode;
36688
36689 case DFmode:
36690 if (!TARGET_VECTORIZE_DOUBLE)
36691 return word_mode;
36692 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
36693 return V4DFmode;
36694 else if (TARGET_SSE2)
36695 return V2DFmode;
36696 /* FALLTHRU */
36697
36698 default:
36699 return word_mode;
36700 }
36701 }
36702
36703 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
36704 vectors. */
36705
36706 static unsigned int
36707 ix86_autovectorize_vector_sizes (void)
36708 {
36709 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
36710 }
36711
36712 /* Initialize the GCC target structure. */
36713 #undef TARGET_RETURN_IN_MEMORY
36714 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
36715
36716 #undef TARGET_LEGITIMIZE_ADDRESS
36717 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
36718
36719 #undef TARGET_ATTRIBUTE_TABLE
36720 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
36721 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36722 # undef TARGET_MERGE_DECL_ATTRIBUTES
36723 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
36724 #endif
36725
36726 #undef TARGET_COMP_TYPE_ATTRIBUTES
36727 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
36728
36729 #undef TARGET_INIT_BUILTINS
36730 #define TARGET_INIT_BUILTINS ix86_init_builtins
36731 #undef TARGET_BUILTIN_DECL
36732 #define TARGET_BUILTIN_DECL ix86_builtin_decl
36733 #undef TARGET_EXPAND_BUILTIN
36734 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
36735
36736 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
36737 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
36738 ix86_builtin_vectorized_function
36739
36740 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
36741 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
36742
36743 #undef TARGET_BUILTIN_RECIPROCAL
36744 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
36745
36746 #undef TARGET_ASM_FUNCTION_EPILOGUE
36747 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
36748
36749 #undef TARGET_ENCODE_SECTION_INFO
36750 #ifndef SUBTARGET_ENCODE_SECTION_INFO
36751 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
36752 #else
36753 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
36754 #endif
36755
36756 #undef TARGET_ASM_OPEN_PAREN
36757 #define TARGET_ASM_OPEN_PAREN ""
36758 #undef TARGET_ASM_CLOSE_PAREN
36759 #define TARGET_ASM_CLOSE_PAREN ""
36760
36761 #undef TARGET_ASM_BYTE_OP
36762 #define TARGET_ASM_BYTE_OP ASM_BYTE
36763
36764 #undef TARGET_ASM_ALIGNED_HI_OP
36765 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
36766 #undef TARGET_ASM_ALIGNED_SI_OP
36767 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
36768 #ifdef ASM_QUAD
36769 #undef TARGET_ASM_ALIGNED_DI_OP
36770 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
36771 #endif
36772
36773 #undef TARGET_PROFILE_BEFORE_PROLOGUE
36774 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
36775
36776 #undef TARGET_ASM_UNALIGNED_HI_OP
36777 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
36778 #undef TARGET_ASM_UNALIGNED_SI_OP
36779 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
36780 #undef TARGET_ASM_UNALIGNED_DI_OP
36781 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
36782
36783 #undef TARGET_PRINT_OPERAND
36784 #define TARGET_PRINT_OPERAND ix86_print_operand
36785 #undef TARGET_PRINT_OPERAND_ADDRESS
36786 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
36787 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
36788 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
36789 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
36790 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
36791
36792 #undef TARGET_SCHED_INIT_GLOBAL
36793 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
36794 #undef TARGET_SCHED_ADJUST_COST
36795 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
36796 #undef TARGET_SCHED_ISSUE_RATE
36797 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
36798 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
36799 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
36800 ia32_multipass_dfa_lookahead
36801
36802 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
36803 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
36804
36805 #ifdef HAVE_AS_TLS
36806 #undef TARGET_HAVE_TLS
36807 #define TARGET_HAVE_TLS true
36808 #endif
36809 #undef TARGET_CANNOT_FORCE_CONST_MEM
36810 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
36811 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
36812 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
36813
36814 #undef TARGET_DELEGITIMIZE_ADDRESS
36815 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
36816
36817 #undef TARGET_MS_BITFIELD_LAYOUT_P
36818 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
36819
36820 #if TARGET_MACHO
36821 #undef TARGET_BINDS_LOCAL_P
36822 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
36823 #endif
36824 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36825 #undef TARGET_BINDS_LOCAL_P
36826 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
36827 #endif
36828
36829 #undef TARGET_ASM_OUTPUT_MI_THUNK
36830 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
36831 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
36832 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
36833
36834 #undef TARGET_ASM_FILE_START
36835 #define TARGET_ASM_FILE_START x86_file_start
36836
36837 #undef TARGET_OPTION_OVERRIDE
36838 #define TARGET_OPTION_OVERRIDE ix86_option_override
36839
36840 #undef TARGET_REGISTER_MOVE_COST
36841 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
36842 #undef TARGET_MEMORY_MOVE_COST
36843 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
36844 #undef TARGET_RTX_COSTS
36845 #define TARGET_RTX_COSTS ix86_rtx_costs
36846 #undef TARGET_ADDRESS_COST
36847 #define TARGET_ADDRESS_COST ix86_address_cost
36848
36849 #undef TARGET_FIXED_CONDITION_CODE_REGS
36850 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
36851 #undef TARGET_CC_MODES_COMPATIBLE
36852 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
36853
36854 #undef TARGET_MACHINE_DEPENDENT_REORG
36855 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
36856
36857 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
36858 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
36859
36860 #undef TARGET_BUILD_BUILTIN_VA_LIST
36861 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
36862
36863 #undef TARGET_ENUM_VA_LIST_P
36864 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
36865
36866 #undef TARGET_FN_ABI_VA_LIST
36867 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
36868
36869 #undef TARGET_CANONICAL_VA_LIST_TYPE
36870 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
36871
36872 #undef TARGET_EXPAND_BUILTIN_VA_START
36873 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
36874
36875 #undef TARGET_MD_ASM_CLOBBERS
36876 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
36877
36878 #undef TARGET_PROMOTE_PROTOTYPES
36879 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
36880 #undef TARGET_STRUCT_VALUE_RTX
36881 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
36882 #undef TARGET_SETUP_INCOMING_VARARGS
36883 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
36884 #undef TARGET_MUST_PASS_IN_STACK
36885 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
36886 #undef TARGET_FUNCTION_ARG_ADVANCE
36887 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
36888 #undef TARGET_FUNCTION_ARG
36889 #define TARGET_FUNCTION_ARG ix86_function_arg
36890 #undef TARGET_FUNCTION_ARG_BOUNDARY
36891 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
36892 #undef TARGET_PASS_BY_REFERENCE
36893 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
36894 #undef TARGET_INTERNAL_ARG_POINTER
36895 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
36896 #undef TARGET_UPDATE_STACK_BOUNDARY
36897 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
36898 #undef TARGET_GET_DRAP_RTX
36899 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
36900 #undef TARGET_STRICT_ARGUMENT_NAMING
36901 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
36902 #undef TARGET_STATIC_CHAIN
36903 #define TARGET_STATIC_CHAIN ix86_static_chain
36904 #undef TARGET_TRAMPOLINE_INIT
36905 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
36906 #undef TARGET_RETURN_POPS_ARGS
36907 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
36908
36909 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
36910 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
36911
36912 #undef TARGET_SCALAR_MODE_SUPPORTED_P
36913 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
36914
36915 #undef TARGET_VECTOR_MODE_SUPPORTED_P
36916 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
36917
36918 #undef TARGET_C_MODE_FOR_SUFFIX
36919 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
36920
36921 #ifdef HAVE_AS_TLS
36922 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
36923 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
36924 #endif
36925
36926 #ifdef SUBTARGET_INSERT_ATTRIBUTES
36927 #undef TARGET_INSERT_ATTRIBUTES
36928 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
36929 #endif
36930
36931 #undef TARGET_MANGLE_TYPE
36932 #define TARGET_MANGLE_TYPE ix86_mangle_type
36933
36934 #ifndef TARGET_MACHO
36935 #undef TARGET_STACK_PROTECT_FAIL
36936 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
36937 #endif
36938
36939 #undef TARGET_FUNCTION_VALUE
36940 #define TARGET_FUNCTION_VALUE ix86_function_value
36941
36942 #undef TARGET_FUNCTION_VALUE_REGNO_P
36943 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
36944
36945 #undef TARGET_PROMOTE_FUNCTION_MODE
36946 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
36947
36948 #undef TARGET_SECONDARY_RELOAD
36949 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
36950
36951 #undef TARGET_CLASS_MAX_NREGS
36952 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
36953
36954 #undef TARGET_PREFERRED_RELOAD_CLASS
36955 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
36956 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
36957 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
36958 #undef TARGET_CLASS_LIKELY_SPILLED_P
36959 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
36960
36961 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
36962 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
36963 ix86_builtin_vectorization_cost
36964 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
36965 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
36966 ix86_vectorize_builtin_vec_perm
36967 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
36968 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
36969 ix86_vectorize_builtin_vec_perm_ok
36970 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
36971 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
36972 ix86_preferred_simd_mode
36973 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
36974 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
36975 ix86_autovectorize_vector_sizes
36976
36977 #undef TARGET_SET_CURRENT_FUNCTION
36978 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
36979
36980 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
36981 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
36982
36983 #undef TARGET_OPTION_SAVE
36984 #define TARGET_OPTION_SAVE ix86_function_specific_save
36985
36986 #undef TARGET_OPTION_RESTORE
36987 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
36988
36989 #undef TARGET_OPTION_PRINT
36990 #define TARGET_OPTION_PRINT ix86_function_specific_print
36991
36992 #undef TARGET_CAN_INLINE_P
36993 #define TARGET_CAN_INLINE_P ix86_can_inline_p
36994
36995 #undef TARGET_EXPAND_TO_RTL_HOOK
36996 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
36997
36998 #undef TARGET_LEGITIMATE_ADDRESS_P
36999 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
37000
37001 #undef TARGET_LEGITIMATE_CONSTANT_P
37002 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
37003
37004 #undef TARGET_FRAME_POINTER_REQUIRED
37005 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
37006
37007 #undef TARGET_CAN_ELIMINATE
37008 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
37009
37010 #undef TARGET_EXTRA_LIVE_ON_ENTRY
37011 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
37012
37013 #undef TARGET_ASM_CODE_END
37014 #define TARGET_ASM_CODE_END ix86_code_end
37015
37016 #undef TARGET_CONDITIONAL_REGISTER_USAGE
37017 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
37018
37019 #if TARGET_MACHO
37020 #undef TARGET_INIT_LIBFUNCS
37021 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
37022 #endif
37023
37024 struct gcc_target targetm = TARGET_INITIALIZER;
37025 \f
37026 #include "gt-i386.h"