target.def (cannot_force_const_mem): Add a mode argument.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62
63 enum upper_128bits_state
64 {
65 unknown = 0,
66 unused,
67 used
68 };
69
70 typedef struct block_info_def
71 {
72 /* State of the upper 128bits of AVX registers at exit. */
73 enum upper_128bits_state state;
74 /* TRUE if state of the upper 128bits of AVX registers is unchanged
75 in this block. */
76 bool unchanged;
77 /* TRUE if block has been processed. */
78 bool processed;
79 /* TRUE if block has been scanned. */
80 bool scanned;
81 /* Previous state of the upper 128bits of AVX registers at entry. */
82 enum upper_128bits_state prev;
83 } *block_info;
84
85 #define BLOCK_INFO(B) ((block_info) (B)->aux)
86
87 enum call_avx256_state
88 {
89 /* Callee returns 256bit AVX register. */
90 callee_return_avx256 = -1,
91 /* Callee returns and passes 256bit AVX register. */
92 callee_return_pass_avx256,
93 /* Callee passes 256bit AVX register. */
94 callee_pass_avx256,
95 /* Callee doesn't return nor passe 256bit AVX register, or no
96 256bit AVX register in function return. */
97 call_no_avx256,
98 /* vzeroupper intrinsic. */
99 vzeroupper_intrinsic
100 };
101
102 /* Check if a 256bit AVX register is referenced in stores. */
103
104 static void
105 check_avx256_stores (rtx dest, const_rtx set, void *data)
106 {
107 if ((REG_P (dest)
108 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
109 || (GET_CODE (set) == SET
110 && REG_P (SET_SRC (set))
111 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
112 {
113 enum upper_128bits_state *state
114 = (enum upper_128bits_state *) data;
115 *state = used;
116 }
117 }
118
119 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
120 in basic block BB. Delete it if upper 128bit AVX registers are
121 unused. If it isn't deleted, move it to just before a jump insn.
122
123 STATE is state of the upper 128bits of AVX registers at entry. */
124
125 static void
126 move_or_delete_vzeroupper_2 (basic_block bb,
127 enum upper_128bits_state state)
128 {
129 rtx insn, bb_end;
130 rtx vzeroupper_insn = NULL_RTX;
131 rtx pat;
132 int avx256;
133 bool unchanged;
134
135 if (BLOCK_INFO (bb)->unchanged)
136 {
137 if (dump_file)
138 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
139 bb->index, state);
140
141 BLOCK_INFO (bb)->state = state;
142 return;
143 }
144
145 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
146 {
147 if (dump_file)
148 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
149 bb->index, BLOCK_INFO (bb)->state);
150 return;
151 }
152
153 BLOCK_INFO (bb)->prev = state;
154
155 if (dump_file)
156 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
157 bb->index, state);
158
159 unchanged = true;
160
161 /* BB_END changes when it is deleted. */
162 bb_end = BB_END (bb);
163 insn = BB_HEAD (bb);
164 while (insn != bb_end)
165 {
166 insn = NEXT_INSN (insn);
167
168 if (!NONDEBUG_INSN_P (insn))
169 continue;
170
171 /* Move vzeroupper before jump/call. */
172 if (JUMP_P (insn) || CALL_P (insn))
173 {
174 if (!vzeroupper_insn)
175 continue;
176
177 if (PREV_INSN (insn) != vzeroupper_insn)
178 {
179 if (dump_file)
180 {
181 fprintf (dump_file, "Move vzeroupper after:\n");
182 print_rtl_single (dump_file, PREV_INSN (insn));
183 fprintf (dump_file, "before:\n");
184 print_rtl_single (dump_file, insn);
185 }
186 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
187 PREV_INSN (insn));
188 }
189 vzeroupper_insn = NULL_RTX;
190 continue;
191 }
192
193 pat = PATTERN (insn);
194
195 /* Check insn for vzeroupper intrinsic. */
196 if (GET_CODE (pat) == UNSPEC_VOLATILE
197 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
198 {
199 if (dump_file)
200 {
201 /* Found vzeroupper intrinsic. */
202 fprintf (dump_file, "Found vzeroupper:\n");
203 print_rtl_single (dump_file, insn);
204 }
205 }
206 else
207 {
208 /* Check insn for vzeroall intrinsic. */
209 if (GET_CODE (pat) == PARALLEL
210 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
211 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
212 {
213 state = unused;
214 unchanged = false;
215
216 /* Delete pending vzeroupper insertion. */
217 if (vzeroupper_insn)
218 {
219 delete_insn (vzeroupper_insn);
220 vzeroupper_insn = NULL_RTX;
221 }
222 }
223 else if (state != used)
224 {
225 note_stores (pat, check_avx256_stores, &state);
226 if (state == used)
227 unchanged = false;
228 }
229 continue;
230 }
231
232 /* Process vzeroupper intrinsic. */
233 avx256 = INTVAL (XVECEXP (pat, 0, 0));
234
235 if (state == unused)
236 {
237 /* Since the upper 128bits are cleared, callee must not pass
238 256bit AVX register. We only need to check if callee
239 returns 256bit AVX register. */
240 if (avx256 == callee_return_avx256)
241 {
242 state = used;
243 unchanged = false;
244 }
245
246 /* Remove unnecessary vzeroupper since upper 128bits are
247 cleared. */
248 if (dump_file)
249 {
250 fprintf (dump_file, "Delete redundant vzeroupper:\n");
251 print_rtl_single (dump_file, insn);
252 }
253 delete_insn (insn);
254 }
255 else
256 {
257 /* Set state to UNUSED if callee doesn't return 256bit AVX
258 register. */
259 if (avx256 != callee_return_pass_avx256)
260 state = unused;
261
262 if (avx256 == callee_return_pass_avx256
263 || avx256 == callee_pass_avx256)
264 {
265 /* Must remove vzeroupper since callee passes in 256bit
266 AVX register. */
267 if (dump_file)
268 {
269 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
270 print_rtl_single (dump_file, insn);
271 }
272 delete_insn (insn);
273 }
274 else
275 {
276 vzeroupper_insn = insn;
277 unchanged = false;
278 }
279 }
280 }
281
282 BLOCK_INFO (bb)->state = state;
283 BLOCK_INFO (bb)->unchanged = unchanged;
284 BLOCK_INFO (bb)->scanned = true;
285
286 if (dump_file)
287 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
288 bb->index, unchanged ? "unchanged" : "changed",
289 state);
290 }
291
292 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
293 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
294 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
295 state is changed. */
296
297 static bool
298 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
299 {
300 edge e;
301 edge_iterator ei;
302 enum upper_128bits_state state, old_state, new_state;
303 bool seen_unknown;
304
305 if (dump_file)
306 fprintf (dump_file, " Process [bb %i]: status: %d\n",
307 block->index, BLOCK_INFO (block)->processed);
308
309 if (BLOCK_INFO (block)->processed)
310 return false;
311
312 state = unused;
313
314 /* Check all predecessor edges of this block. */
315 seen_unknown = false;
316 FOR_EACH_EDGE (e, ei, block->preds)
317 {
318 if (e->src == block)
319 continue;
320 switch (BLOCK_INFO (e->src)->state)
321 {
322 case unknown:
323 if (!unknown_is_unused)
324 seen_unknown = true;
325 case unused:
326 break;
327 case used:
328 state = used;
329 goto done;
330 }
331 }
332
333 if (seen_unknown)
334 state = unknown;
335
336 done:
337 old_state = BLOCK_INFO (block)->state;
338 move_or_delete_vzeroupper_2 (block, state);
339 new_state = BLOCK_INFO (block)->state;
340
341 if (state != unknown || new_state == used)
342 BLOCK_INFO (block)->processed = true;
343
344 /* Need to rescan if the upper 128bits of AVX registers are changed
345 to USED at exit. */
346 if (new_state != old_state)
347 {
348 if (new_state == used)
349 cfun->machine->rescan_vzeroupper_p = 1;
350 return true;
351 }
352 else
353 return false;
354 }
355
356 /* Go through the instruction stream looking for vzeroupper. Delete
357 it if upper 128bit AVX registers are unused. If it isn't deleted,
358 move it to just before a jump insn. */
359
360 static void
361 move_or_delete_vzeroupper (void)
362 {
363 edge e;
364 edge_iterator ei;
365 basic_block bb;
366 fibheap_t worklist, pending, fibheap_swap;
367 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
368 int *bb_order;
369 int *rc_order;
370 int i;
371
372 /* Set up block info for each basic block. */
373 alloc_aux_for_blocks (sizeof (struct block_info_def));
374
375 /* Process outgoing edges of entry point. */
376 if (dump_file)
377 fprintf (dump_file, "Process outgoing edges of entry point\n");
378
379 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
380 {
381 move_or_delete_vzeroupper_2 (e->dest,
382 cfun->machine->caller_pass_avx256_p
383 ? used : unused);
384 BLOCK_INFO (e->dest)->processed = true;
385 }
386
387 /* Compute reverse completion order of depth first search of the CFG
388 so that the data-flow runs faster. */
389 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
390 bb_order = XNEWVEC (int, last_basic_block);
391 pre_and_rev_post_order_compute (NULL, rc_order, false);
392 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
393 bb_order[rc_order[i]] = i;
394 free (rc_order);
395
396 worklist = fibheap_new ();
397 pending = fibheap_new ();
398 visited = sbitmap_alloc (last_basic_block);
399 in_worklist = sbitmap_alloc (last_basic_block);
400 in_pending = sbitmap_alloc (last_basic_block);
401 sbitmap_zero (in_worklist);
402
403 /* Don't check outgoing edges of entry point. */
404 sbitmap_ones (in_pending);
405 FOR_EACH_BB (bb)
406 if (BLOCK_INFO (bb)->processed)
407 RESET_BIT (in_pending, bb->index);
408 else
409 {
410 move_or_delete_vzeroupper_1 (bb, false);
411 fibheap_insert (pending, bb_order[bb->index], bb);
412 }
413
414 if (dump_file)
415 fprintf (dump_file, "Check remaining basic blocks\n");
416
417 while (!fibheap_empty (pending))
418 {
419 fibheap_swap = pending;
420 pending = worklist;
421 worklist = fibheap_swap;
422 sbitmap_swap = in_pending;
423 in_pending = in_worklist;
424 in_worklist = sbitmap_swap;
425
426 sbitmap_zero (visited);
427
428 cfun->machine->rescan_vzeroupper_p = 0;
429
430 while (!fibheap_empty (worklist))
431 {
432 bb = (basic_block) fibheap_extract_min (worklist);
433 RESET_BIT (in_worklist, bb->index);
434 gcc_assert (!TEST_BIT (visited, bb->index));
435 if (!TEST_BIT (visited, bb->index))
436 {
437 edge_iterator ei;
438
439 SET_BIT (visited, bb->index);
440
441 if (move_or_delete_vzeroupper_1 (bb, false))
442 FOR_EACH_EDGE (e, ei, bb->succs)
443 {
444 if (e->dest == EXIT_BLOCK_PTR
445 || BLOCK_INFO (e->dest)->processed)
446 continue;
447
448 if (TEST_BIT (visited, e->dest->index))
449 {
450 if (!TEST_BIT (in_pending, e->dest->index))
451 {
452 /* Send E->DEST to next round. */
453 SET_BIT (in_pending, e->dest->index);
454 fibheap_insert (pending,
455 bb_order[e->dest->index],
456 e->dest);
457 }
458 }
459 else if (!TEST_BIT (in_worklist, e->dest->index))
460 {
461 /* Add E->DEST to current round. */
462 SET_BIT (in_worklist, e->dest->index);
463 fibheap_insert (worklist, bb_order[e->dest->index],
464 e->dest);
465 }
466 }
467 }
468 }
469
470 if (!cfun->machine->rescan_vzeroupper_p)
471 break;
472 }
473
474 free (bb_order);
475 fibheap_delete (worklist);
476 fibheap_delete (pending);
477 sbitmap_free (visited);
478 sbitmap_free (in_worklist);
479 sbitmap_free (in_pending);
480
481 if (dump_file)
482 fprintf (dump_file, "Process remaining basic blocks\n");
483
484 FOR_EACH_BB (bb)
485 move_or_delete_vzeroupper_1 (bb, true);
486
487 free_aux_for_blocks ();
488 }
489
490 static rtx legitimize_dllimport_symbol (rtx, bool);
491
492 #ifndef CHECK_STACK_LIMIT
493 #define CHECK_STACK_LIMIT (-1)
494 #endif
495
496 /* Return index of given mode in mult and division cost tables. */
497 #define MODE_INDEX(mode) \
498 ((mode) == QImode ? 0 \
499 : (mode) == HImode ? 1 \
500 : (mode) == SImode ? 2 \
501 : (mode) == DImode ? 3 \
502 : 4)
503
504 /* Processor costs (relative to an add) */
505 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
506 #define COSTS_N_BYTES(N) ((N) * 2)
507
508 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
509
510 const
511 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
512 COSTS_N_BYTES (2), /* cost of an add instruction */
513 COSTS_N_BYTES (3), /* cost of a lea instruction */
514 COSTS_N_BYTES (2), /* variable shift costs */
515 COSTS_N_BYTES (3), /* constant shift costs */
516 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
517 COSTS_N_BYTES (3), /* HI */
518 COSTS_N_BYTES (3), /* SI */
519 COSTS_N_BYTES (3), /* DI */
520 COSTS_N_BYTES (5)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
523 COSTS_N_BYTES (3), /* HI */
524 COSTS_N_BYTES (3), /* SI */
525 COSTS_N_BYTES (3), /* DI */
526 COSTS_N_BYTES (5)}, /* other */
527 COSTS_N_BYTES (3), /* cost of movsx */
528 COSTS_N_BYTES (3), /* cost of movzx */
529 0, /* "large" insn */
530 2, /* MOVE_RATIO */
531 2, /* cost for loading QImode using movzbl */
532 {2, 2, 2}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 2, 2}, /* cost of storing integer registers */
536 2, /* cost of reg,reg fld/fst */
537 {2, 2, 2}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {2, 2, 2}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 3, /* cost of moving MMX register */
542 {3, 3}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {3, 3}, /* cost of storing MMX registers
545 in SImode and DImode */
546 3, /* cost of moving SSE register */
547 {3, 3, 3}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {3, 3, 3}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 3, /* MMX or SSE register to integer */
552 0, /* size of l1 cache */
553 0, /* size of l2 cache */
554 0, /* size of prefetch block */
555 0, /* number of parallel prefetches */
556 2, /* Branch cost */
557 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
559 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
560 COSTS_N_BYTES (2), /* cost of FABS instruction. */
561 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
562 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
563 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
564 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 1, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 1, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
578 };
579
580 /* Processor costs (relative to an add) */
581 static const
582 struct processor_costs i386_cost = { /* 386 specific costs */
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (1), /* cost of a lea instruction */
585 COSTS_N_INSNS (3), /* variable shift costs */
586 COSTS_N_INSNS (2), /* constant shift costs */
587 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (6), /* HI */
589 COSTS_N_INSNS (6), /* SI */
590 COSTS_N_INSNS (6), /* DI */
591 COSTS_N_INSNS (6)}, /* other */
592 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (23), /* HI */
595 COSTS_N_INSNS (23), /* SI */
596 COSTS_N_INSNS (23), /* DI */
597 COSTS_N_INSNS (23)}, /* other */
598 COSTS_N_INSNS (3), /* cost of movsx */
599 COSTS_N_INSNS (2), /* cost of movzx */
600 15, /* "large" insn */
601 3, /* MOVE_RATIO */
602 4, /* cost for loading QImode using movzbl */
603 {2, 4, 2}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {2, 4, 2}, /* cost of storing integer registers */
607 2, /* cost of reg,reg fld/fst */
608 {8, 8, 8}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {8, 8, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 8}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 8}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 8, 16}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 8, 16}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 3, /* MMX or SSE register to integer */
623 0, /* size of l1 cache */
624 0, /* size of l2 cache */
625 0, /* size of prefetch block */
626 0, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (22), /* cost of FABS instruction. */
632 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
634 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 1, /* scalar_stmt_cost. */
639 1, /* scalar load_cost. */
640 1, /* scalar_store_cost. */
641 1, /* vec_stmt_cost. */
642 1, /* vec_to_scalar_cost. */
643 1, /* scalar_to_vec_cost. */
644 1, /* vec_align_load_cost. */
645 2, /* vec_unalign_load_cost. */
646 1, /* vec_store_cost. */
647 3, /* cond_taken_branch_cost. */
648 1, /* cond_not_taken_branch_cost. */
649 };
650
651 static const
652 struct processor_costs i486_cost = { /* 486 specific costs */
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (1), /* cost of a lea instruction */
655 COSTS_N_INSNS (3), /* variable shift costs */
656 COSTS_N_INSNS (2), /* constant shift costs */
657 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (12), /* HI */
659 COSTS_N_INSNS (12), /* SI */
660 COSTS_N_INSNS (12), /* DI */
661 COSTS_N_INSNS (12)}, /* other */
662 1, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (40), /* HI */
665 COSTS_N_INSNS (40), /* SI */
666 COSTS_N_INSNS (40), /* DI */
667 COSTS_N_INSNS (40)}, /* other */
668 COSTS_N_INSNS (3), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 15, /* "large" insn */
671 3, /* MOVE_RATIO */
672 4, /* cost for loading QImode using movzbl */
673 {2, 4, 2}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 4, 2}, /* cost of storing integer registers */
677 2, /* cost of reg,reg fld/fst */
678 {8, 8, 8}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {8, 8, 8}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {4, 8}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {4, 8}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {4, 8, 16}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {4, 8, 16}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 3, /* MMX or SSE register to integer */
693 4, /* size of l1 cache. 486 has 8kB cache
694 shared for code and data, so 4kB is
695 not really precise. */
696 4, /* size of l2 cache */
697 0, /* size of prefetch block */
698 0, /* number of parallel prefetches */
699 1, /* Branch cost */
700 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
701 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
702 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
703 COSTS_N_INSNS (3), /* cost of FABS instruction. */
704 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
705 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
706 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
707 DUMMY_STRINGOP_ALGS},
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
721 };
722
723 static const
724 struct processor_costs pentium_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1), /* cost of a lea instruction */
727 COSTS_N_INSNS (4), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (11), /* HI */
731 COSTS_N_INSNS (11), /* SI */
732 COSTS_N_INSNS (11), /* DI */
733 COSTS_N_INSNS (11)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (25), /* HI */
737 COSTS_N_INSNS (25), /* SI */
738 COSTS_N_INSNS (25), /* DI */
739 COSTS_N_INSNS (25)}, /* other */
740 COSTS_N_INSNS (3), /* cost of movsx */
741 COSTS_N_INSNS (2), /* cost of movzx */
742 8, /* "large" insn */
743 6, /* MOVE_RATIO */
744 6, /* cost for loading QImode using movzbl */
745 {2, 4, 2}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {2, 4, 2}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {2, 2, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 6}, /* cost of storing fp registers
753 in SFmode, DFmode and XFmode */
754 8, /* cost of moving MMX register */
755 {8, 8}, /* cost of loading MMX registers
756 in SImode and DImode */
757 {8, 8}, /* cost of storing MMX registers
758 in SImode and DImode */
759 2, /* cost of moving SSE register */
760 {4, 8, 16}, /* cost of loading SSE registers
761 in SImode, DImode and TImode */
762 {4, 8, 16}, /* cost of storing SSE registers
763 in SImode, DImode and TImode */
764 3, /* MMX or SSE register to integer */
765 8, /* size of l1 cache. */
766 8, /* size of l2 cache */
767 0, /* size of prefetch block */
768 0, /* number of parallel prefetches */
769 2, /* Branch cost */
770 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (1), /* cost of FABS instruction. */
774 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
776 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
777 DUMMY_STRINGOP_ALGS},
778 {{libcall, {{-1, rep_prefix_4_byte}}},
779 DUMMY_STRINGOP_ALGS},
780 1, /* scalar_stmt_cost. */
781 1, /* scalar load_cost. */
782 1, /* scalar_store_cost. */
783 1, /* vec_stmt_cost. */
784 1, /* vec_to_scalar_cost. */
785 1, /* scalar_to_vec_cost. */
786 1, /* vec_align_load_cost. */
787 2, /* vec_unalign_load_cost. */
788 1, /* vec_store_cost. */
789 3, /* cond_taken_branch_cost. */
790 1, /* cond_not_taken_branch_cost. */
791 };
792
793 static const
794 struct processor_costs pentiumpro_cost = {
795 COSTS_N_INSNS (1), /* cost of an add instruction */
796 COSTS_N_INSNS (1), /* cost of a lea instruction */
797 COSTS_N_INSNS (1), /* variable shift costs */
798 COSTS_N_INSNS (1), /* constant shift costs */
799 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
800 COSTS_N_INSNS (4), /* HI */
801 COSTS_N_INSNS (4), /* SI */
802 COSTS_N_INSNS (4), /* DI */
803 COSTS_N_INSNS (4)}, /* other */
804 0, /* cost of multiply per each bit set */
805 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
806 COSTS_N_INSNS (17), /* HI */
807 COSTS_N_INSNS (17), /* SI */
808 COSTS_N_INSNS (17), /* DI */
809 COSTS_N_INSNS (17)}, /* other */
810 COSTS_N_INSNS (1), /* cost of movsx */
811 COSTS_N_INSNS (1), /* cost of movzx */
812 8, /* "large" insn */
813 6, /* MOVE_RATIO */
814 2, /* cost for loading QImode using movzbl */
815 {4, 4, 4}, /* cost of loading integer registers
816 in QImode, HImode and SImode.
817 Relative to reg-reg move (2). */
818 {2, 2, 2}, /* cost of storing integer registers */
819 2, /* cost of reg,reg fld/fst */
820 {2, 2, 6}, /* cost of loading fp registers
821 in SFmode, DFmode and XFmode */
822 {4, 4, 6}, /* cost of storing fp registers
823 in SFmode, DFmode and XFmode */
824 2, /* cost of moving MMX register */
825 {2, 2}, /* cost of loading MMX registers
826 in SImode and DImode */
827 {2, 2}, /* cost of storing MMX registers
828 in SImode and DImode */
829 2, /* cost of moving SSE register */
830 {2, 2, 8}, /* cost of loading SSE registers
831 in SImode, DImode and TImode */
832 {2, 2, 8}, /* cost of storing SSE registers
833 in SImode, DImode and TImode */
834 3, /* MMX or SSE register to integer */
835 8, /* size of l1 cache. */
836 256, /* size of l2 cache */
837 32, /* size of prefetch block */
838 6, /* number of parallel prefetches */
839 2, /* Branch cost */
840 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
841 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
842 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
843 COSTS_N_INSNS (2), /* cost of FABS instruction. */
844 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
845 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
846 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
847 (we ensure the alignment). For small blocks inline loop is still a
848 noticeable win, for bigger blocks either rep movsl or rep movsb is
849 way to go. Rep movsb has apparently more expensive startup time in CPU,
850 but after 4K the difference is down in the noise. */
851 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
852 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
853 DUMMY_STRINGOP_ALGS},
854 {{rep_prefix_4_byte, {{1024, unrolled_loop},
855 {8192, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
868 };
869
870 static const
871 struct processor_costs geode_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (1), /* cost of a lea instruction */
874 COSTS_N_INSNS (2), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (4), /* HI */
878 COSTS_N_INSNS (7), /* SI */
879 COSTS_N_INSNS (7), /* DI */
880 COSTS_N_INSNS (7)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (23), /* HI */
884 COSTS_N_INSNS (39), /* SI */
885 COSTS_N_INSNS (39), /* DI */
886 COSTS_N_INSNS (39)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
890 4, /* MOVE_RATIO */
891 1, /* cost for loading QImode using movzbl */
892 {1, 1, 1}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {1, 1, 1}, /* cost of storing integer registers */
896 1, /* cost of reg,reg fld/fst */
897 {1, 1, 1}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {4, 6, 6}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
901
902 1, /* cost of moving MMX register */
903 {1, 1}, /* cost of loading MMX registers
904 in SImode and DImode */
905 {1, 1}, /* cost of storing MMX registers
906 in SImode and DImode */
907 1, /* cost of moving SSE register */
908 {1, 1, 1}, /* cost of loading SSE registers
909 in SImode, DImode and TImode */
910 {1, 1, 1}, /* cost of storing SSE registers
911 in SImode, DImode and TImode */
912 1, /* MMX or SSE register to integer */
913 64, /* size of l1 cache. */
914 128, /* size of l2 cache. */
915 32, /* size of prefetch block */
916 1, /* number of parallel prefetches */
917 1, /* Branch cost */
918 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
919 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
920 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
921 COSTS_N_INSNS (1), /* cost of FABS instruction. */
922 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
923 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
924 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
925 DUMMY_STRINGOP_ALGS},
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 1, /* scalar_stmt_cost. */
929 1, /* scalar load_cost. */
930 1, /* scalar_store_cost. */
931 1, /* vec_stmt_cost. */
932 1, /* vec_to_scalar_cost. */
933 1, /* scalar_to_vec_cost. */
934 1, /* vec_align_load_cost. */
935 2, /* vec_unalign_load_cost. */
936 1, /* vec_store_cost. */
937 3, /* cond_taken_branch_cost. */
938 1, /* cond_not_taken_branch_cost. */
939 };
940
941 static const
942 struct processor_costs k6_cost = {
943 COSTS_N_INSNS (1), /* cost of an add instruction */
944 COSTS_N_INSNS (2), /* cost of a lea instruction */
945 COSTS_N_INSNS (1), /* variable shift costs */
946 COSTS_N_INSNS (1), /* constant shift costs */
947 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
948 COSTS_N_INSNS (3), /* HI */
949 COSTS_N_INSNS (3), /* SI */
950 COSTS_N_INSNS (3), /* DI */
951 COSTS_N_INSNS (3)}, /* other */
952 0, /* cost of multiply per each bit set */
953 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
954 COSTS_N_INSNS (18), /* HI */
955 COSTS_N_INSNS (18), /* SI */
956 COSTS_N_INSNS (18), /* DI */
957 COSTS_N_INSNS (18)}, /* other */
958 COSTS_N_INSNS (2), /* cost of movsx */
959 COSTS_N_INSNS (2), /* cost of movzx */
960 8, /* "large" insn */
961 4, /* MOVE_RATIO */
962 3, /* cost for loading QImode using movzbl */
963 {4, 5, 4}, /* cost of loading integer registers
964 in QImode, HImode and SImode.
965 Relative to reg-reg move (2). */
966 {2, 3, 2}, /* cost of storing integer registers */
967 4, /* cost of reg,reg fld/fst */
968 {6, 6, 6}, /* cost of loading fp registers
969 in SFmode, DFmode and XFmode */
970 {4, 4, 4}, /* cost of storing fp registers
971 in SFmode, DFmode and XFmode */
972 2, /* cost of moving MMX register */
973 {2, 2}, /* cost of loading MMX registers
974 in SImode and DImode */
975 {2, 2}, /* cost of storing MMX registers
976 in SImode and DImode */
977 2, /* cost of moving SSE register */
978 {2, 2, 8}, /* cost of loading SSE registers
979 in SImode, DImode and TImode */
980 {2, 2, 8}, /* cost of storing SSE registers
981 in SImode, DImode and TImode */
982 6, /* MMX or SSE register to integer */
983 32, /* size of l1 cache. */
984 32, /* size of l2 cache. Some models
985 have integrated l2 cache, but
986 optimizing for k6 is not important
987 enough to worry about that. */
988 32, /* size of prefetch block */
989 1, /* number of parallel prefetches */
990 1, /* Branch cost */
991 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
992 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
993 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
994 COSTS_N_INSNS (2), /* cost of FABS instruction. */
995 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
996 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
997 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
998 DUMMY_STRINGOP_ALGS},
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 2, /* vec_unalign_load_cost. */
1009 1, /* vec_store_cost. */
1010 3, /* cond_taken_branch_cost. */
1011 1, /* cond_not_taken_branch_cost. */
1012 };
1013
1014 static const
1015 struct processor_costs athlon_cost = {
1016 COSTS_N_INSNS (1), /* cost of an add instruction */
1017 COSTS_N_INSNS (2), /* cost of a lea instruction */
1018 COSTS_N_INSNS (1), /* variable shift costs */
1019 COSTS_N_INSNS (1), /* constant shift costs */
1020 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1021 COSTS_N_INSNS (5), /* HI */
1022 COSTS_N_INSNS (5), /* SI */
1023 COSTS_N_INSNS (5), /* DI */
1024 COSTS_N_INSNS (5)}, /* other */
1025 0, /* cost of multiply per each bit set */
1026 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1027 COSTS_N_INSNS (26), /* HI */
1028 COSTS_N_INSNS (42), /* SI */
1029 COSTS_N_INSNS (74), /* DI */
1030 COSTS_N_INSNS (74)}, /* other */
1031 COSTS_N_INSNS (1), /* cost of movsx */
1032 COSTS_N_INSNS (1), /* cost of movzx */
1033 8, /* "large" insn */
1034 9, /* MOVE_RATIO */
1035 4, /* cost for loading QImode using movzbl */
1036 {3, 4, 3}, /* cost of loading integer registers
1037 in QImode, HImode and SImode.
1038 Relative to reg-reg move (2). */
1039 {3, 4, 3}, /* cost of storing integer registers */
1040 4, /* cost of reg,reg fld/fst */
1041 {4, 4, 12}, /* cost of loading fp registers
1042 in SFmode, DFmode and XFmode */
1043 {6, 6, 8}, /* cost of storing fp registers
1044 in SFmode, DFmode and XFmode */
1045 2, /* cost of moving MMX register */
1046 {4, 4}, /* cost of loading MMX registers
1047 in SImode and DImode */
1048 {4, 4}, /* cost of storing MMX registers
1049 in SImode and DImode */
1050 2, /* cost of moving SSE register */
1051 {4, 4, 6}, /* cost of loading SSE registers
1052 in SImode, DImode and TImode */
1053 {4, 4, 5}, /* cost of storing SSE registers
1054 in SImode, DImode and TImode */
1055 5, /* MMX or SSE register to integer */
1056 64, /* size of l1 cache. */
1057 256, /* size of l2 cache. */
1058 64, /* size of prefetch block */
1059 6, /* number of parallel prefetches */
1060 5, /* Branch cost */
1061 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1062 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1063 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1064 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1065 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1066 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1067 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1068 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1069 128 bytes for memset. */
1070 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1071 DUMMY_STRINGOP_ALGS},
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 1, /* scalar_stmt_cost. */
1075 1, /* scalar load_cost. */
1076 1, /* scalar_store_cost. */
1077 1, /* vec_stmt_cost. */
1078 1, /* vec_to_scalar_cost. */
1079 1, /* scalar_to_vec_cost. */
1080 1, /* vec_align_load_cost. */
1081 2, /* vec_unalign_load_cost. */
1082 1, /* vec_store_cost. */
1083 3, /* cond_taken_branch_cost. */
1084 1, /* cond_not_taken_branch_cost. */
1085 };
1086
1087 static const
1088 struct processor_costs k8_cost = {
1089 COSTS_N_INSNS (1), /* cost of an add instruction */
1090 COSTS_N_INSNS (2), /* cost of a lea instruction */
1091 COSTS_N_INSNS (1), /* variable shift costs */
1092 COSTS_N_INSNS (1), /* constant shift costs */
1093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1094 COSTS_N_INSNS (4), /* HI */
1095 COSTS_N_INSNS (3), /* SI */
1096 COSTS_N_INSNS (4), /* DI */
1097 COSTS_N_INSNS (5)}, /* other */
1098 0, /* cost of multiply per each bit set */
1099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1100 COSTS_N_INSNS (26), /* HI */
1101 COSTS_N_INSNS (42), /* SI */
1102 COSTS_N_INSNS (74), /* DI */
1103 COSTS_N_INSNS (74)}, /* other */
1104 COSTS_N_INSNS (1), /* cost of movsx */
1105 COSTS_N_INSNS (1), /* cost of movzx */
1106 8, /* "large" insn */
1107 9, /* MOVE_RATIO */
1108 4, /* cost for loading QImode using movzbl */
1109 {3, 4, 3}, /* cost of loading integer registers
1110 in QImode, HImode and SImode.
1111 Relative to reg-reg move (2). */
1112 {3, 4, 3}, /* cost of storing integer registers */
1113 4, /* cost of reg,reg fld/fst */
1114 {4, 4, 12}, /* cost of loading fp registers
1115 in SFmode, DFmode and XFmode */
1116 {6, 6, 8}, /* cost of storing fp registers
1117 in SFmode, DFmode and XFmode */
1118 2, /* cost of moving MMX register */
1119 {3, 3}, /* cost of loading MMX registers
1120 in SImode and DImode */
1121 {4, 4}, /* cost of storing MMX registers
1122 in SImode and DImode */
1123 2, /* cost of moving SSE register */
1124 {4, 3, 6}, /* cost of loading SSE registers
1125 in SImode, DImode and TImode */
1126 {4, 4, 5}, /* cost of storing SSE registers
1127 in SImode, DImode and TImode */
1128 5, /* MMX or SSE register to integer */
1129 64, /* size of l1 cache. */
1130 512, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1136 time). */
1137 100, /* number of parallel prefetches */
1138 3, /* Branch cost */
1139 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1145 /* K8 has optimized REP instruction for medium sized blocks, but for very
1146 small blocks it is better to use loop. For large blocks, libcall can
1147 do nontemporary accesses and beat inline considerably. */
1148 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1149 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1150 {{libcall, {{8, loop}, {24, unrolled_loop},
1151 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1152 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1153 4, /* scalar_stmt_cost. */
1154 2, /* scalar load_cost. */
1155 2, /* scalar_store_cost. */
1156 5, /* vec_stmt_cost. */
1157 0, /* vec_to_scalar_cost. */
1158 2, /* scalar_to_vec_cost. */
1159 2, /* vec_align_load_cost. */
1160 3, /* vec_unalign_load_cost. */
1161 3, /* vec_store_cost. */
1162 3, /* cond_taken_branch_cost. */
1163 2, /* cond_not_taken_branch_cost. */
1164 };
1165
1166 struct processor_costs amdfam10_cost = {
1167 COSTS_N_INSNS (1), /* cost of an add instruction */
1168 COSTS_N_INSNS (2), /* cost of a lea instruction */
1169 COSTS_N_INSNS (1), /* variable shift costs */
1170 COSTS_N_INSNS (1), /* constant shift costs */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1172 COSTS_N_INSNS (4), /* HI */
1173 COSTS_N_INSNS (3), /* SI */
1174 COSTS_N_INSNS (4), /* DI */
1175 COSTS_N_INSNS (5)}, /* other */
1176 0, /* cost of multiply per each bit set */
1177 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1178 COSTS_N_INSNS (35), /* HI */
1179 COSTS_N_INSNS (51), /* SI */
1180 COSTS_N_INSNS (83), /* DI */
1181 COSTS_N_INSNS (83)}, /* other */
1182 COSTS_N_INSNS (1), /* cost of movsx */
1183 COSTS_N_INSNS (1), /* cost of movzx */
1184 8, /* "large" insn */
1185 9, /* MOVE_RATIO */
1186 4, /* cost for loading QImode using movzbl */
1187 {3, 4, 3}, /* cost of loading integer registers
1188 in QImode, HImode and SImode.
1189 Relative to reg-reg move (2). */
1190 {3, 4, 3}, /* cost of storing integer registers */
1191 4, /* cost of reg,reg fld/fst */
1192 {4, 4, 12}, /* cost of loading fp registers
1193 in SFmode, DFmode and XFmode */
1194 {6, 6, 8}, /* cost of storing fp registers
1195 in SFmode, DFmode and XFmode */
1196 2, /* cost of moving MMX register */
1197 {3, 3}, /* cost of loading MMX registers
1198 in SImode and DImode */
1199 {4, 4}, /* cost of storing MMX registers
1200 in SImode and DImode */
1201 2, /* cost of moving SSE register */
1202 {4, 4, 3}, /* cost of loading SSE registers
1203 in SImode, DImode and TImode */
1204 {4, 4, 5}, /* cost of storing SSE registers
1205 in SImode, DImode and TImode */
1206 3, /* MMX or SSE register to integer */
1207 /* On K8:
1208 MOVD reg64, xmmreg Double FSTORE 4
1209 MOVD reg32, xmmreg Double FSTORE 4
1210 On AMDFAM10:
1211 MOVD reg64, xmmreg Double FADD 3
1212 1/1 1/1
1213 MOVD reg32, xmmreg Double FADD 3
1214 1/1 1/1 */
1215 64, /* size of l1 cache. */
1216 512, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1222 time). */
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1231
1232 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1233 very small blocks it is better to use loop. For large blocks, libcall can
1234 do nontemporary accesses and beat inline considerably. */
1235 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1236 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1237 {{libcall, {{8, loop}, {24, unrolled_loop},
1238 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1239 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1251 };
1252
1253 struct processor_costs bdver1_cost = {
1254 COSTS_N_INSNS (1), /* cost of an add instruction */
1255 COSTS_N_INSNS (1), /* cost of a lea instruction */
1256 COSTS_N_INSNS (1), /* variable shift costs */
1257 COSTS_N_INSNS (1), /* constant shift costs */
1258 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1259 COSTS_N_INSNS (4), /* HI */
1260 COSTS_N_INSNS (4), /* SI */
1261 COSTS_N_INSNS (6), /* DI */
1262 COSTS_N_INSNS (6)}, /* other */
1263 0, /* cost of multiply per each bit set */
1264 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1265 COSTS_N_INSNS (35), /* HI */
1266 COSTS_N_INSNS (51), /* SI */
1267 COSTS_N_INSNS (83), /* DI */
1268 COSTS_N_INSNS (83)}, /* other */
1269 COSTS_N_INSNS (1), /* cost of movsx */
1270 COSTS_N_INSNS (1), /* cost of movzx */
1271 8, /* "large" insn */
1272 9, /* MOVE_RATIO */
1273 4, /* cost for loading QImode using movzbl */
1274 {5, 5, 4}, /* cost of loading integer registers
1275 in QImode, HImode and SImode.
1276 Relative to reg-reg move (2). */
1277 {4, 4, 4}, /* cost of storing integer registers */
1278 2, /* cost of reg,reg fld/fst */
1279 {5, 5, 12}, /* cost of loading fp registers
1280 in SFmode, DFmode and XFmode */
1281 {4, 4, 8}, /* cost of storing fp registers
1282 in SFmode, DFmode and XFmode */
1283 2, /* cost of moving MMX register */
1284 {4, 4}, /* cost of loading MMX registers
1285 in SImode and DImode */
1286 {4, 4}, /* cost of storing MMX registers
1287 in SImode and DImode */
1288 2, /* cost of moving SSE register */
1289 {4, 4, 4}, /* cost of loading SSE registers
1290 in SImode, DImode and TImode */
1291 {4, 4, 4}, /* cost of storing SSE registers
1292 in SImode, DImode and TImode */
1293 2, /* MMX or SSE register to integer */
1294 /* On K8:
1295 MOVD reg64, xmmreg Double FSTORE 4
1296 MOVD reg32, xmmreg Double FSTORE 4
1297 On AMDFAM10:
1298 MOVD reg64, xmmreg Double FADD 3
1299 1/1 1/1
1300 MOVD reg32, xmmreg Double FADD 3
1301 1/1 1/1 */
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1309 time). */
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1318
1319 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1320 very small blocks it is better to use loop. For large blocks, libcall
1321 can do nontemporary accesses and beat inline considerably. */
1322 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1323 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1324 {{libcall, {{8, loop}, {24, unrolled_loop},
1325 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1326 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 6, /* scalar_stmt_cost. */
1328 4, /* scalar load_cost. */
1329 4, /* scalar_store_cost. */
1330 6, /* vec_stmt_cost. */
1331 0, /* vec_to_scalar_cost. */
1332 2, /* scalar_to_vec_cost. */
1333 4, /* vec_align_load_cost. */
1334 4, /* vec_unalign_load_cost. */
1335 4, /* vec_store_cost. */
1336 2, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1338 };
1339
1340 struct processor_costs btver1_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (2), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (4), /* HI */
1347 COSTS_N_INSNS (3), /* SI */
1348 COSTS_N_INSNS (4), /* DI */
1349 COSTS_N_INSNS (5)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (35), /* HI */
1353 COSTS_N_INSNS (51), /* SI */
1354 COSTS_N_INSNS (83), /* DI */
1355 COSTS_N_INSNS (83)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 8, /* "large" insn */
1359 9, /* MOVE_RATIO */
1360 4, /* cost for loading QImode using movzbl */
1361 {3, 4, 3}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {3, 4, 3}, /* cost of storing integer registers */
1365 4, /* cost of reg,reg fld/fst */
1366 {4, 4, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {6, 6, 8}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 2, /* cost of moving MMX register */
1371 {3, 3}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {4, 4}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 2, /* cost of moving SSE register */
1376 {4, 4, 3}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {4, 4, 5}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 3, /* MMX or SSE register to integer */
1381 /* On K8:
1382 MOVD reg64, xmmreg Double FSTORE 4
1383 MOVD reg32, xmmreg Double FSTORE 4
1384 On AMDFAM10:
1385 MOVD reg64, xmmreg Double FADD 3
1386 1/1 1/1
1387 MOVD reg32, xmmreg Double FADD 3
1388 1/1 1/1 */
1389 32, /* size of l1 cache. */
1390 512, /* size of l2 cache. */
1391 64, /* size of prefetch block */
1392 100, /* number of parallel prefetches */
1393 2, /* Branch cost */
1394 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1395 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1396 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1397 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1398 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1399 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1400
1401 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1402 very small blocks it is better to use loop. For large blocks, libcall can
1403 do nontemporary accesses and beat inline considerably. */
1404 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1405 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1406 {{libcall, {{8, loop}, {24, unrolled_loop},
1407 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1408 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1409 4, /* scalar_stmt_cost. */
1410 2, /* scalar load_cost. */
1411 2, /* scalar_store_cost. */
1412 6, /* vec_stmt_cost. */
1413 0, /* vec_to_scalar_cost. */
1414 2, /* scalar_to_vec_cost. */
1415 2, /* vec_align_load_cost. */
1416 2, /* vec_unalign_load_cost. */
1417 2, /* vec_store_cost. */
1418 2, /* cond_taken_branch_cost. */
1419 1, /* cond_not_taken_branch_cost. */
1420 };
1421
1422 static const
1423 struct processor_costs pentium4_cost = {
1424 COSTS_N_INSNS (1), /* cost of an add instruction */
1425 COSTS_N_INSNS (3), /* cost of a lea instruction */
1426 COSTS_N_INSNS (4), /* variable shift costs */
1427 COSTS_N_INSNS (4), /* constant shift costs */
1428 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1429 COSTS_N_INSNS (15), /* HI */
1430 COSTS_N_INSNS (15), /* SI */
1431 COSTS_N_INSNS (15), /* DI */
1432 COSTS_N_INSNS (15)}, /* other */
1433 0, /* cost of multiply per each bit set */
1434 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1435 COSTS_N_INSNS (56), /* HI */
1436 COSTS_N_INSNS (56), /* SI */
1437 COSTS_N_INSNS (56), /* DI */
1438 COSTS_N_INSNS (56)}, /* other */
1439 COSTS_N_INSNS (1), /* cost of movsx */
1440 COSTS_N_INSNS (1), /* cost of movzx */
1441 16, /* "large" insn */
1442 6, /* MOVE_RATIO */
1443 2, /* cost for loading QImode using movzbl */
1444 {4, 5, 4}, /* cost of loading integer registers
1445 in QImode, HImode and SImode.
1446 Relative to reg-reg move (2). */
1447 {2, 3, 2}, /* cost of storing integer registers */
1448 2, /* cost of reg,reg fld/fst */
1449 {2, 2, 6}, /* cost of loading fp registers
1450 in SFmode, DFmode and XFmode */
1451 {4, 4, 6}, /* cost of storing fp registers
1452 in SFmode, DFmode and XFmode */
1453 2, /* cost of moving MMX register */
1454 {2, 2}, /* cost of loading MMX registers
1455 in SImode and DImode */
1456 {2, 2}, /* cost of storing MMX registers
1457 in SImode and DImode */
1458 12, /* cost of moving SSE register */
1459 {12, 12, 12}, /* cost of loading SSE registers
1460 in SImode, DImode and TImode */
1461 {2, 2, 8}, /* cost of storing SSE registers
1462 in SImode, DImode and TImode */
1463 10, /* MMX or SSE register to integer */
1464 8, /* size of l1 cache. */
1465 256, /* size of l2 cache. */
1466 64, /* size of prefetch block */
1467 6, /* number of parallel prefetches */
1468 2, /* Branch cost */
1469 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1470 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1471 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1474 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1475 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1476 DUMMY_STRINGOP_ALGS},
1477 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1478 {-1, libcall}}},
1479 DUMMY_STRINGOP_ALGS},
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1491 };
1492
1493 static const
1494 struct processor_costs nocona_cost = {
1495 COSTS_N_INSNS (1), /* cost of an add instruction */
1496 COSTS_N_INSNS (1), /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (10), /* HI */
1501 COSTS_N_INSNS (10), /* SI */
1502 COSTS_N_INSNS (10), /* DI */
1503 COSTS_N_INSNS (10)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (66), /* HI */
1507 COSTS_N_INSNS (66), /* SI */
1508 COSTS_N_INSNS (66), /* DI */
1509 COSTS_N_INSNS (66)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 16, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 3, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {4, 4, 4}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 6, /* cost of moving MMX register */
1525 {12, 12}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {12, 12}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 6, /* cost of moving SSE register */
1530 {12, 12, 12}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {12, 12, 12}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 8, /* MMX or SSE register to integer */
1535 8, /* size of l1 cache. */
1536 1024, /* size of l2 cache. */
1537 128, /* size of prefetch block */
1538 8, /* number of parallel prefetches */
1539 1, /* Branch cost */
1540 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1546 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1547 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1548 {100000, unrolled_loop}, {-1, libcall}}}},
1549 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1550 {-1, libcall}}},
1551 {libcall, {{24, loop}, {64, unrolled_loop},
1552 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1553 1, /* scalar_stmt_cost. */
1554 1, /* scalar load_cost. */
1555 1, /* scalar_store_cost. */
1556 1, /* vec_stmt_cost. */
1557 1, /* vec_to_scalar_cost. */
1558 1, /* scalar_to_vec_cost. */
1559 1, /* vec_align_load_cost. */
1560 2, /* vec_unalign_load_cost. */
1561 1, /* vec_store_cost. */
1562 3, /* cond_taken_branch_cost. */
1563 1, /* cond_not_taken_branch_cost. */
1564 };
1565
1566 static const
1567 struct processor_costs atom_cost = {
1568 COSTS_N_INSNS (1), /* cost of an add instruction */
1569 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1570 COSTS_N_INSNS (1), /* variable shift costs */
1571 COSTS_N_INSNS (1), /* constant shift costs */
1572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1573 COSTS_N_INSNS (4), /* HI */
1574 COSTS_N_INSNS (3), /* SI */
1575 COSTS_N_INSNS (4), /* DI */
1576 COSTS_N_INSNS (2)}, /* other */
1577 0, /* cost of multiply per each bit set */
1578 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1579 COSTS_N_INSNS (26), /* HI */
1580 COSTS_N_INSNS (42), /* SI */
1581 COSTS_N_INSNS (74), /* DI */
1582 COSTS_N_INSNS (74)}, /* other */
1583 COSTS_N_INSNS (1), /* cost of movsx */
1584 COSTS_N_INSNS (1), /* cost of movzx */
1585 8, /* "large" insn */
1586 17, /* MOVE_RATIO */
1587 2, /* cost for loading QImode using movzbl */
1588 {4, 4, 4}, /* cost of loading integer registers
1589 in QImode, HImode and SImode.
1590 Relative to reg-reg move (2). */
1591 {4, 4, 4}, /* cost of storing integer registers */
1592 4, /* cost of reg,reg fld/fst */
1593 {12, 12, 12}, /* cost of loading fp registers
1594 in SFmode, DFmode and XFmode */
1595 {6, 6, 8}, /* cost of storing fp registers
1596 in SFmode, DFmode and XFmode */
1597 2, /* cost of moving MMX register */
1598 {8, 8}, /* cost of loading MMX registers
1599 in SImode and DImode */
1600 {8, 8}, /* cost of storing MMX registers
1601 in SImode and DImode */
1602 2, /* cost of moving SSE register */
1603 {8, 8, 8}, /* cost of loading SSE registers
1604 in SImode, DImode and TImode */
1605 {8, 8, 8}, /* cost of storing SSE registers
1606 in SImode, DImode and TImode */
1607 5, /* MMX or SSE register to integer */
1608 32, /* size of l1 cache. */
1609 256, /* size of l2 cache. */
1610 64, /* size of prefetch block */
1611 6, /* number of parallel prefetches */
1612 3, /* Branch cost */
1613 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1614 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1615 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1616 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1617 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1618 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1619 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1620 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1621 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1622 {{libcall, {{8, loop}, {15, unrolled_loop},
1623 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1624 {libcall, {{24, loop}, {32, unrolled_loop},
1625 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1626 1, /* scalar_stmt_cost. */
1627 1, /* scalar load_cost. */
1628 1, /* scalar_store_cost. */
1629 1, /* vec_stmt_cost. */
1630 1, /* vec_to_scalar_cost. */
1631 1, /* scalar_to_vec_cost. */
1632 1, /* vec_align_load_cost. */
1633 2, /* vec_unalign_load_cost. */
1634 1, /* vec_store_cost. */
1635 3, /* cond_taken_branch_cost. */
1636 1, /* cond_not_taken_branch_cost. */
1637 };
1638
1639 /* Generic64 should produce code tuned for Nocona and K8. */
1640 static const
1641 struct processor_costs generic64_cost = {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 /* On all chips taken into consideration lea is 2 cycles and more. With
1644 this cost however our current implementation of synth_mult results in
1645 use of unnecessary temporary registers causing regression on several
1646 SPECfp benchmarks. */
1647 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1648 COSTS_N_INSNS (1), /* variable shift costs */
1649 COSTS_N_INSNS (1), /* constant shift costs */
1650 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1651 COSTS_N_INSNS (4), /* HI */
1652 COSTS_N_INSNS (3), /* SI */
1653 COSTS_N_INSNS (4), /* DI */
1654 COSTS_N_INSNS (2)}, /* other */
1655 0, /* cost of multiply per each bit set */
1656 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1657 COSTS_N_INSNS (26), /* HI */
1658 COSTS_N_INSNS (42), /* SI */
1659 COSTS_N_INSNS (74), /* DI */
1660 COSTS_N_INSNS (74)}, /* other */
1661 COSTS_N_INSNS (1), /* cost of movsx */
1662 COSTS_N_INSNS (1), /* cost of movzx */
1663 8, /* "large" insn */
1664 17, /* MOVE_RATIO */
1665 4, /* cost for loading QImode using movzbl */
1666 {4, 4, 4}, /* cost of loading integer registers
1667 in QImode, HImode and SImode.
1668 Relative to reg-reg move (2). */
1669 {4, 4, 4}, /* cost of storing integer registers */
1670 4, /* cost of reg,reg fld/fst */
1671 {12, 12, 12}, /* cost of loading fp registers
1672 in SFmode, DFmode and XFmode */
1673 {6, 6, 8}, /* cost of storing fp registers
1674 in SFmode, DFmode and XFmode */
1675 2, /* cost of moving MMX register */
1676 {8, 8}, /* cost of loading MMX registers
1677 in SImode and DImode */
1678 {8, 8}, /* cost of storing MMX registers
1679 in SImode and DImode */
1680 2, /* cost of moving SSE register */
1681 {8, 8, 8}, /* cost of loading SSE registers
1682 in SImode, DImode and TImode */
1683 {8, 8, 8}, /* cost of storing SSE registers
1684 in SImode, DImode and TImode */
1685 5, /* MMX or SSE register to integer */
1686 32, /* size of l1 cache. */
1687 512, /* size of l2 cache. */
1688 64, /* size of prefetch block */
1689 6, /* number of parallel prefetches */
1690 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1691 value is increased to perhaps more appropriate value of 5. */
1692 3, /* Branch cost */
1693 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1694 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1695 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1696 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1697 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1698 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1699 {DUMMY_STRINGOP_ALGS,
1700 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1701 {DUMMY_STRINGOP_ALGS,
1702 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1703 1, /* scalar_stmt_cost. */
1704 1, /* scalar load_cost. */
1705 1, /* scalar_store_cost. */
1706 1, /* vec_stmt_cost. */
1707 1, /* vec_to_scalar_cost. */
1708 1, /* scalar_to_vec_cost. */
1709 1, /* vec_align_load_cost. */
1710 2, /* vec_unalign_load_cost. */
1711 1, /* vec_store_cost. */
1712 3, /* cond_taken_branch_cost. */
1713 1, /* cond_not_taken_branch_cost. */
1714 };
1715
1716 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1717 Athlon and K8. */
1718 static const
1719 struct processor_costs generic32_cost = {
1720 COSTS_N_INSNS (1), /* cost of an add instruction */
1721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1722 COSTS_N_INSNS (1), /* variable shift costs */
1723 COSTS_N_INSNS (1), /* constant shift costs */
1724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1725 COSTS_N_INSNS (4), /* HI */
1726 COSTS_N_INSNS (3), /* SI */
1727 COSTS_N_INSNS (4), /* DI */
1728 COSTS_N_INSNS (2)}, /* other */
1729 0, /* cost of multiply per each bit set */
1730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1731 COSTS_N_INSNS (26), /* HI */
1732 COSTS_N_INSNS (42), /* SI */
1733 COSTS_N_INSNS (74), /* DI */
1734 COSTS_N_INSNS (74)}, /* other */
1735 COSTS_N_INSNS (1), /* cost of movsx */
1736 COSTS_N_INSNS (1), /* cost of movzx */
1737 8, /* "large" insn */
1738 17, /* MOVE_RATIO */
1739 4, /* cost for loading QImode using movzbl */
1740 {4, 4, 4}, /* cost of loading integer registers
1741 in QImode, HImode and SImode.
1742 Relative to reg-reg move (2). */
1743 {4, 4, 4}, /* cost of storing integer registers */
1744 4, /* cost of reg,reg fld/fst */
1745 {12, 12, 12}, /* cost of loading fp registers
1746 in SFmode, DFmode and XFmode */
1747 {6, 6, 8}, /* cost of storing fp registers
1748 in SFmode, DFmode and XFmode */
1749 2, /* cost of moving MMX register */
1750 {8, 8}, /* cost of loading MMX registers
1751 in SImode and DImode */
1752 {8, 8}, /* cost of storing MMX registers
1753 in SImode and DImode */
1754 2, /* cost of moving SSE register */
1755 {8, 8, 8}, /* cost of loading SSE registers
1756 in SImode, DImode and TImode */
1757 {8, 8, 8}, /* cost of storing SSE registers
1758 in SImode, DImode and TImode */
1759 5, /* MMX or SSE register to integer */
1760 32, /* size of l1 cache. */
1761 256, /* size of l2 cache. */
1762 64, /* size of prefetch block */
1763 6, /* number of parallel prefetches */
1764 3, /* Branch cost */
1765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1772 DUMMY_STRINGOP_ALGS},
1773 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1774 DUMMY_STRINGOP_ALGS},
1775 1, /* scalar_stmt_cost. */
1776 1, /* scalar load_cost. */
1777 1, /* scalar_store_cost. */
1778 1, /* vec_stmt_cost. */
1779 1, /* vec_to_scalar_cost. */
1780 1, /* scalar_to_vec_cost. */
1781 1, /* vec_align_load_cost. */
1782 2, /* vec_unalign_load_cost. */
1783 1, /* vec_store_cost. */
1784 3, /* cond_taken_branch_cost. */
1785 1, /* cond_not_taken_branch_cost. */
1786 };
1787
1788 const struct processor_costs *ix86_cost = &pentium_cost;
1789
1790 /* Processor feature/optimization bitmasks. */
1791 #define m_386 (1<<PROCESSOR_I386)
1792 #define m_486 (1<<PROCESSOR_I486)
1793 #define m_PENT (1<<PROCESSOR_PENTIUM)
1794 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1795 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1796 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1797 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1798 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1799 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1800 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1801 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1802 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1803 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1804 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1805 #define m_ATOM (1<<PROCESSOR_ATOM)
1806
1807 #define m_GEODE (1<<PROCESSOR_GEODE)
1808 #define m_K6 (1<<PROCESSOR_K6)
1809 #define m_K6_GEODE (m_K6 | m_GEODE)
1810 #define m_K8 (1<<PROCESSOR_K8)
1811 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1812 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1813 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1814 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1815 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1816 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1817
1818 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1819 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1820
1821 /* Generic instruction choice should be common subset of supported CPUs
1822 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1823 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1824
1825 /* Feature tests against the various tunings. */
1826 unsigned char ix86_tune_features[X86_TUNE_LAST];
1827
1828 /* Feature tests against the various tunings used to create ix86_tune_features
1829 based on the processor mask. */
1830 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1831 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1832 negatively, so enabling for Generic64 seems like good code size
1833 tradeoff. We can't enable it for 32bit generic because it does not
1834 work well with PPro base chips. */
1835 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1836
1837 /* X86_TUNE_PUSH_MEMORY */
1838 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1839 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1840
1841 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1842 m_486 | m_PENT,
1843
1844 /* X86_TUNE_UNROLL_STRLEN */
1845 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1846 | m_CORE2I7 | m_GENERIC,
1847
1848 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1849 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1850 | m_CORE2I7 | m_GENERIC,
1851
1852 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1853 on simulation result. But after P4 was made, no performance benefit
1854 was observed with branch hints. It also increases the code size.
1855 As a result, icc never generates branch hints. */
1856 0,
1857
1858 /* X86_TUNE_DOUBLE_WITH_ADD */
1859 ~m_386,
1860
1861 /* X86_TUNE_USE_SAHF */
1862 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1863 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1864
1865 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1866 partial dependencies. */
1867 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1868 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1869
1870 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1871 register stalls on Generic32 compilation setting as well. However
1872 in current implementation the partial register stalls are not eliminated
1873 very well - they can be introduced via subregs synthesized by combine
1874 and can happen in caller/callee saving sequences. Because this option
1875 pays back little on PPro based chips and is in conflict with partial reg
1876 dependencies used by Athlon/P4 based chips, it is better to leave it off
1877 for generic32 for now. */
1878 m_PPRO,
1879
1880 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1881 m_CORE2I7 | m_GENERIC,
1882
1883 /* X86_TUNE_USE_HIMODE_FIOP */
1884 m_386 | m_486 | m_K6_GEODE,
1885
1886 /* X86_TUNE_USE_SIMODE_FIOP */
1887 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1888
1889 /* X86_TUNE_USE_MOV0 */
1890 m_K6,
1891
1892 /* X86_TUNE_USE_CLTD */
1893 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1894
1895 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1896 m_PENT4,
1897
1898 /* X86_TUNE_SPLIT_LONG_MOVES */
1899 m_PPRO,
1900
1901 /* X86_TUNE_READ_MODIFY_WRITE */
1902 ~m_PENT,
1903
1904 /* X86_TUNE_READ_MODIFY */
1905 ~(m_PENT | m_PPRO),
1906
1907 /* X86_TUNE_PROMOTE_QIMODE */
1908 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1909 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1910
1911 /* X86_TUNE_FAST_PREFIX */
1912 ~(m_PENT | m_486 | m_386),
1913
1914 /* X86_TUNE_SINGLE_STRINGOP */
1915 m_386 | m_PENT4 | m_NOCONA,
1916
1917 /* X86_TUNE_QIMODE_MATH */
1918 ~0,
1919
1920 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1921 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1922 might be considered for Generic32 if our scheme for avoiding partial
1923 stalls was more effective. */
1924 ~m_PPRO,
1925
1926 /* X86_TUNE_PROMOTE_QI_REGS */
1927 0,
1928
1929 /* X86_TUNE_PROMOTE_HI_REGS */
1930 m_PPRO,
1931
1932 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1933 over esp addition. */
1934 m_386 | m_486 | m_PENT | m_PPRO,
1935
1936 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1937 over esp addition. */
1938 m_PENT,
1939
1940 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1941 over esp subtraction. */
1942 m_386 | m_486 | m_PENT | m_K6_GEODE,
1943
1944 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1945 over esp subtraction. */
1946 m_PENT | m_K6_GEODE,
1947
1948 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1949 for DFmode copies */
1950 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1951 | m_GENERIC | m_GEODE),
1952
1953 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1954 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1955
1956 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1957 conflict here in between PPro/Pentium4 based chips that thread 128bit
1958 SSE registers as single units versus K8 based chips that divide SSE
1959 registers to two 64bit halves. This knob promotes all store destinations
1960 to be 128bit to allow register renaming on 128bit SSE units, but usually
1961 results in one extra microop on 64bit SSE units. Experimental results
1962 shows that disabling this option on P4 brings over 20% SPECfp regression,
1963 while enabling it on K8 brings roughly 2.4% regression that can be partly
1964 masked by careful scheduling of moves. */
1965 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1966 | m_AMDFAM10 | m_BDVER1,
1967
1968 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1969 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1970
1971 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1972 m_BDVER1 | m_COREI7,
1973
1974 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1975 m_BDVER1,
1976
1977 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1978 are resolved on SSE register parts instead of whole registers, so we may
1979 maintain just lower part of scalar values in proper format leaving the
1980 upper part undefined. */
1981 m_ATHLON_K8,
1982
1983 /* X86_TUNE_SSE_TYPELESS_STORES */
1984 m_AMD_MULTIPLE,
1985
1986 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1987 m_PPRO | m_PENT4 | m_NOCONA,
1988
1989 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1990 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1991
1992 /* X86_TUNE_PROLOGUE_USING_MOVE */
1993 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1994
1995 /* X86_TUNE_EPILOGUE_USING_MOVE */
1996 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1997
1998 /* X86_TUNE_SHIFT1 */
1999 ~m_486,
2000
2001 /* X86_TUNE_USE_FFREEP */
2002 m_AMD_MULTIPLE,
2003
2004 /* X86_TUNE_INTER_UNIT_MOVES */
2005 ~(m_AMD_MULTIPLE | m_GENERIC),
2006
2007 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2008 ~(m_AMDFAM10 | m_BDVER1),
2009
2010 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2011 than 4 branch instructions in the 16 byte window. */
2012 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2013 | m_GENERIC,
2014
2015 /* X86_TUNE_SCHEDULE */
2016 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2017 | m_GENERIC,
2018
2019 /* X86_TUNE_USE_BT */
2020 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2021
2022 /* X86_TUNE_USE_INCDEC */
2023 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2024
2025 /* X86_TUNE_PAD_RETURNS */
2026 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2027
2028 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2029 m_ATOM,
2030
2031 /* X86_TUNE_EXT_80387_CONSTANTS */
2032 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2033 | m_CORE2I7 | m_GENERIC,
2034
2035 /* X86_TUNE_SHORTEN_X87_SSE */
2036 ~m_K8,
2037
2038 /* X86_TUNE_AVOID_VECTOR_DECODE */
2039 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2040
2041 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2042 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2043 ~(m_386 | m_486),
2044
2045 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2046 vector path on AMD machines. */
2047 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2048
2049 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2050 machines. */
2051 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2052
2053 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2054 than a MOV. */
2055 m_PENT,
2056
2057 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2058 but one byte longer. */
2059 m_PENT,
2060
2061 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2062 operand that cannot be represented using a modRM byte. The XOR
2063 replacement is long decoded, so this split helps here as well. */
2064 m_K6,
2065
2066 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2067 from FP to FP. */
2068 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2069
2070 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2071 from integer to FP. */
2072 m_AMDFAM10,
2073
2074 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2075 with a subsequent conditional jump instruction into a single
2076 compare-and-branch uop. */
2077 m_BDVER1,
2078
2079 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2080 will impact LEA instruction selection. */
2081 m_ATOM,
2082
2083 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2084 instructions. */
2085 ~m_ATOM,
2086 };
2087
2088 /* Feature tests against the various architecture variations. */
2089 unsigned char ix86_arch_features[X86_ARCH_LAST];
2090
2091 /* Feature tests against the various architecture variations, used to create
2092 ix86_arch_features based on the processor mask. */
2093 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2094 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2095 ~(m_386 | m_486 | m_PENT | m_K6),
2096
2097 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2098 ~m_386,
2099
2100 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2101 ~(m_386 | m_486),
2102
2103 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2104 ~m_386,
2105
2106 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2107 ~m_386,
2108 };
2109
2110 static const unsigned int x86_accumulate_outgoing_args
2111 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2112 | m_GENERIC;
2113
2114 static const unsigned int x86_arch_always_fancy_math_387
2115 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2116 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2117
2118 static enum stringop_alg stringop_alg = no_stringop;
2119
2120 /* In case the average insn count for single function invocation is
2121 lower than this constant, emit fast (but longer) prologue and
2122 epilogue code. */
2123 #define FAST_PROLOGUE_INSN_COUNT 20
2124
2125 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2126 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2127 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2128 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129
2130 /* Array of the smallest class containing reg number REGNO, indexed by
2131 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132
2133 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 {
2135 /* ax, dx, cx, bx */
2136 AREG, DREG, CREG, BREG,
2137 /* si, di, bp, sp */
2138 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139 /* FP registers */
2140 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2141 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2142 /* arg pointer */
2143 NON_Q_REGS,
2144 /* flags, fpsr, fpcr, frame */
2145 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146 /* SSE registers */
2147 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2148 SSE_REGS, SSE_REGS,
2149 /* MMX registers */
2150 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2151 MMX_REGS, MMX_REGS,
2152 /* REX registers */
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2155 /* SSE REX registers */
2156 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157 SSE_REGS, SSE_REGS,
2158 };
2159
2160 /* The "default" register map used in 32bit mode. */
2161
2162 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2163 {
2164 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2165 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2166 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2167 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2168 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2169 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2170 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2171 };
2172
2173 /* The "default" register map used in 64bit mode. */
2174
2175 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 {
2177 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2178 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2179 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2180 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2181 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2182 8,9,10,11,12,13,14,15, /* extended integer registers */
2183 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2184 };
2185
2186 /* Define the register numbers to be used in Dwarf debugging information.
2187 The SVR4 reference port C compiler uses the following register numbers
2188 in its Dwarf output code:
2189 0 for %eax (gcc regno = 0)
2190 1 for %ecx (gcc regno = 2)
2191 2 for %edx (gcc regno = 1)
2192 3 for %ebx (gcc regno = 3)
2193 4 for %esp (gcc regno = 7)
2194 5 for %ebp (gcc regno = 6)
2195 6 for %esi (gcc regno = 4)
2196 7 for %edi (gcc regno = 5)
2197 The following three DWARF register numbers are never generated by
2198 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2199 believes these numbers have these meanings.
2200 8 for %eip (no gcc equivalent)
2201 9 for %eflags (gcc regno = 17)
2202 10 for %trapno (no gcc equivalent)
2203 It is not at all clear how we should number the FP stack registers
2204 for the x86 architecture. If the version of SDB on x86/svr4 were
2205 a bit less brain dead with respect to floating-point then we would
2206 have a precedent to follow with respect to DWARF register numbers
2207 for x86 FP registers, but the SDB on x86/svr4 is so completely
2208 broken with respect to FP registers that it is hardly worth thinking
2209 of it as something to strive for compatibility with.
2210 The version of x86/svr4 SDB I have at the moment does (partially)
2211 seem to believe that DWARF register number 11 is associated with
2212 the x86 register %st(0), but that's about all. Higher DWARF
2213 register numbers don't seem to be associated with anything in
2214 particular, and even for DWARF regno 11, SDB only seems to under-
2215 stand that it should say that a variable lives in %st(0) (when
2216 asked via an `=' command) if we said it was in DWARF regno 11,
2217 but SDB still prints garbage when asked for the value of the
2218 variable in question (via a `/' command).
2219 (Also note that the labels SDB prints for various FP stack regs
2220 when doing an `x' command are all wrong.)
2221 Note that these problems generally don't affect the native SVR4
2222 C compiler because it doesn't allow the use of -O with -g and
2223 because when it is *not* optimizing, it allocates a memory
2224 location for each floating-point variable, and the memory
2225 location is what gets described in the DWARF AT_location
2226 attribute for the variable in question.
2227 Regardless of the severe mental illness of the x86/svr4 SDB, we
2228 do something sensible here and we use the following DWARF
2229 register numbers. Note that these are all stack-top-relative
2230 numbers.
2231 11 for %st(0) (gcc regno = 8)
2232 12 for %st(1) (gcc regno = 9)
2233 13 for %st(2) (gcc regno = 10)
2234 14 for %st(3) (gcc regno = 11)
2235 15 for %st(4) (gcc regno = 12)
2236 16 for %st(5) (gcc regno = 13)
2237 17 for %st(6) (gcc regno = 14)
2238 18 for %st(7) (gcc regno = 15)
2239 */
2240 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2241 {
2242 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2243 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2244 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2245 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2246 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2247 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2248 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2249 };
2250
2251 /* Define parameter passing and return registers. */
2252
2253 static int const x86_64_int_parameter_registers[6] =
2254 {
2255 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2256 };
2257
2258 static int const x86_64_ms_abi_int_parameter_registers[4] =
2259 {
2260 CX_REG, DX_REG, R8_REG, R9_REG
2261 };
2262
2263 static int const x86_64_int_return_registers[4] =
2264 {
2265 AX_REG, DX_REG, DI_REG, SI_REG
2266 };
2267
2268 /* Define the structure for the machine field in struct function. */
2269
2270 struct GTY(()) stack_local_entry {
2271 unsigned short mode;
2272 unsigned short n;
2273 rtx rtl;
2274 struct stack_local_entry *next;
2275 };
2276
2277 /* Structure describing stack frame layout.
2278 Stack grows downward:
2279
2280 [arguments]
2281 <- ARG_POINTER
2282 saved pc
2283
2284 saved static chain if ix86_static_chain_on_stack
2285
2286 saved frame pointer if frame_pointer_needed
2287 <- HARD_FRAME_POINTER
2288 [saved regs]
2289 <- regs_save_offset
2290 [padding0]
2291
2292 [saved SSE regs]
2293 <- sse_regs_save_offset
2294 [padding1] |
2295 | <- FRAME_POINTER
2296 [va_arg registers] |
2297 |
2298 [frame] |
2299 |
2300 [padding2] | = to_allocate
2301 <- STACK_POINTER
2302 */
2303 struct ix86_frame
2304 {
2305 int nsseregs;
2306 int nregs;
2307 int va_arg_size;
2308 int red_zone_size;
2309 int outgoing_arguments_size;
2310 HOST_WIDE_INT frame;
2311
2312 /* The offsets relative to ARG_POINTER. */
2313 HOST_WIDE_INT frame_pointer_offset;
2314 HOST_WIDE_INT hard_frame_pointer_offset;
2315 HOST_WIDE_INT stack_pointer_offset;
2316 HOST_WIDE_INT hfp_save_offset;
2317 HOST_WIDE_INT reg_save_offset;
2318 HOST_WIDE_INT sse_reg_save_offset;
2319
2320 /* When save_regs_using_mov is set, emit prologue using
2321 move instead of push instructions. */
2322 bool save_regs_using_mov;
2323 };
2324
2325 /* Code model option. */
2326 enum cmodel ix86_cmodel;
2327 /* Asm dialect. */
2328 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2329 /* TLS dialects. */
2330 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2331
2332 /* Which unit we are generating floating point math for. */
2333 enum fpmath_unit ix86_fpmath;
2334
2335 /* Which cpu are we scheduling for. */
2336 enum attr_cpu ix86_schedule;
2337
2338 /* Which cpu are we optimizing for. */
2339 enum processor_type ix86_tune;
2340
2341 /* Which instruction set architecture to use. */
2342 enum processor_type ix86_arch;
2343
2344 /* true if sse prefetch instruction is not NOOP. */
2345 int x86_prefetch_sse;
2346
2347 /* ix86_regparm_string as a number */
2348 static int ix86_regparm;
2349
2350 /* -mstackrealign option */
2351 static const char ix86_force_align_arg_pointer_string[]
2352 = "force_align_arg_pointer";
2353
2354 static rtx (*ix86_gen_leave) (void);
2355 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2357 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2358 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2359 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2361 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2362 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2364
2365 /* Preferred alignment for stack boundary in bits. */
2366 unsigned int ix86_preferred_stack_boundary;
2367
2368 /* Alignment for incoming stack boundary in bits specified at
2369 command line. */
2370 static unsigned int ix86_user_incoming_stack_boundary;
2371
2372 /* Default alignment for incoming stack boundary in bits. */
2373 static unsigned int ix86_default_incoming_stack_boundary;
2374
2375 /* Alignment for incoming stack boundary in bits. */
2376 unsigned int ix86_incoming_stack_boundary;
2377
2378 /* The abi used by target. */
2379 enum calling_abi ix86_abi;
2380
2381 /* Values 1-5: see jump.c */
2382 int ix86_branch_cost;
2383
2384 /* Calling abi specific va_list type nodes. */
2385 static GTY(()) tree sysv_va_list_type_node;
2386 static GTY(()) tree ms_va_list_type_node;
2387
2388 /* Variables which are this size or smaller are put in the data/bss
2389 or ldata/lbss sections. */
2390
2391 int ix86_section_threshold = 65536;
2392
2393 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2394 char internal_label_prefix[16];
2395 int internal_label_prefix_len;
2396
2397 /* Fence to use after loop using movnt. */
2398 tree x86_mfence;
2399
2400 /* Register class used for passing given 64bit part of the argument.
2401 These represent classes as documented by the PS ABI, with the exception
2402 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2403 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2404
2405 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2406 whenever possible (upper half does contain padding). */
2407 enum x86_64_reg_class
2408 {
2409 X86_64_NO_CLASS,
2410 X86_64_INTEGER_CLASS,
2411 X86_64_INTEGERSI_CLASS,
2412 X86_64_SSE_CLASS,
2413 X86_64_SSESF_CLASS,
2414 X86_64_SSEDF_CLASS,
2415 X86_64_SSEUP_CLASS,
2416 X86_64_X87_CLASS,
2417 X86_64_X87UP_CLASS,
2418 X86_64_COMPLEX_X87_CLASS,
2419 X86_64_MEMORY_CLASS
2420 };
2421
2422 #define MAX_CLASSES 4
2423
2424 /* Table of constants used by fldpi, fldln2, etc.... */
2425 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2426 static bool ext_80387_constants_init = 0;
2427
2428 \f
2429 static struct machine_function * ix86_init_machine_status (void);
2430 static rtx ix86_function_value (const_tree, const_tree, bool);
2431 static bool ix86_function_value_regno_p (const unsigned int);
2432 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2433 const_tree);
2434 static rtx ix86_static_chain (const_tree, bool);
2435 static int ix86_function_regparm (const_tree, const_tree);
2436 static void ix86_compute_frame_layout (struct ix86_frame *);
2437 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2438 rtx, rtx, int);
2439 static void ix86_add_new_builtins (int);
2440 static rtx ix86_expand_vec_perm_builtin (tree);
2441 static tree ix86_canonical_va_list_type (tree);
2442 static void predict_jump (int);
2443 static unsigned int split_stack_prologue_scratch_regno (void);
2444 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2445
2446 enum ix86_function_specific_strings
2447 {
2448 IX86_FUNCTION_SPECIFIC_ARCH,
2449 IX86_FUNCTION_SPECIFIC_TUNE,
2450 IX86_FUNCTION_SPECIFIC_FPMATH,
2451 IX86_FUNCTION_SPECIFIC_MAX
2452 };
2453
2454 static char *ix86_target_string (int, int, const char *, const char *,
2455 const char *, bool);
2456 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2457 static void ix86_function_specific_save (struct cl_target_option *);
2458 static void ix86_function_specific_restore (struct cl_target_option *);
2459 static void ix86_function_specific_print (FILE *, int,
2460 struct cl_target_option *);
2461 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2462 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2463 static bool ix86_can_inline_p (tree, tree);
2464 static void ix86_set_current_function (tree);
2465 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2466
2467 static enum calling_abi ix86_function_abi (const_tree);
2468
2469 \f
2470 #ifndef SUBTARGET32_DEFAULT_CPU
2471 #define SUBTARGET32_DEFAULT_CPU "i386"
2472 #endif
2473
2474 /* The svr4 ABI for the i386 says that records and unions are returned
2475 in memory. */
2476 #ifndef DEFAULT_PCC_STRUCT_RETURN
2477 #define DEFAULT_PCC_STRUCT_RETURN 1
2478 #endif
2479
2480 /* Whether -mtune= or -march= were specified */
2481 static int ix86_tune_defaulted;
2482 static int ix86_arch_specified;
2483
2484 /* Define a set of ISAs which are available when a given ISA is
2485 enabled. MMX and SSE ISAs are handled separately. */
2486
2487 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2488 #define OPTION_MASK_ISA_3DNOW_SET \
2489 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2490
2491 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2492 #define OPTION_MASK_ISA_SSE2_SET \
2493 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2494 #define OPTION_MASK_ISA_SSE3_SET \
2495 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2496 #define OPTION_MASK_ISA_SSSE3_SET \
2497 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2498 #define OPTION_MASK_ISA_SSE4_1_SET \
2499 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2500 #define OPTION_MASK_ISA_SSE4_2_SET \
2501 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2502 #define OPTION_MASK_ISA_AVX_SET \
2503 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2504 #define OPTION_MASK_ISA_FMA_SET \
2505 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2506
2507 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2508 as -msse4.2. */
2509 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2510
2511 #define OPTION_MASK_ISA_SSE4A_SET \
2512 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2513 #define OPTION_MASK_ISA_FMA4_SET \
2514 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2515 | OPTION_MASK_ISA_AVX_SET)
2516 #define OPTION_MASK_ISA_XOP_SET \
2517 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2518 #define OPTION_MASK_ISA_LWP_SET \
2519 OPTION_MASK_ISA_LWP
2520
2521 /* AES and PCLMUL need SSE2 because they use xmm registers */
2522 #define OPTION_MASK_ISA_AES_SET \
2523 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2524 #define OPTION_MASK_ISA_PCLMUL_SET \
2525 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2526
2527 #define OPTION_MASK_ISA_ABM_SET \
2528 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2529
2530 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2531 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2532 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2533 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2534 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2535 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2536 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2537
2538 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2539 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2540 #define OPTION_MASK_ISA_F16C_SET \
2541 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2542
2543 /* Define a set of ISAs which aren't available when a given ISA is
2544 disabled. MMX and SSE ISAs are handled separately. */
2545
2546 #define OPTION_MASK_ISA_MMX_UNSET \
2547 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2548 #define OPTION_MASK_ISA_3DNOW_UNSET \
2549 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2550 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2551
2552 #define OPTION_MASK_ISA_SSE_UNSET \
2553 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2554 #define OPTION_MASK_ISA_SSE2_UNSET \
2555 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2556 #define OPTION_MASK_ISA_SSE3_UNSET \
2557 (OPTION_MASK_ISA_SSE3 \
2558 | OPTION_MASK_ISA_SSSE3_UNSET \
2559 | OPTION_MASK_ISA_SSE4A_UNSET )
2560 #define OPTION_MASK_ISA_SSSE3_UNSET \
2561 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2562 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2563 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2564 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2565 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2566 #define OPTION_MASK_ISA_AVX_UNSET \
2567 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2568 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2569 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2570
2571 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2572 as -mno-sse4.1. */
2573 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2574
2575 #define OPTION_MASK_ISA_SSE4A_UNSET \
2576 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2577
2578 #define OPTION_MASK_ISA_FMA4_UNSET \
2579 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2580 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2581 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2582
2583 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2584 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2585 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2586 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2587 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2588 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2589 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2590 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2591 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2592 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2593
2594 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2595 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2596 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2597
2598 /* Vectorization library interface and handlers. */
2599 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2600
2601 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2602 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2603
2604 /* Processor target table, indexed by processor number */
2605 struct ptt
2606 {
2607 const struct processor_costs *cost; /* Processor costs */
2608 const int align_loop; /* Default alignments. */
2609 const int align_loop_max_skip;
2610 const int align_jump;
2611 const int align_jump_max_skip;
2612 const int align_func;
2613 };
2614
2615 static const struct ptt processor_target_table[PROCESSOR_max] =
2616 {
2617 {&i386_cost, 4, 3, 4, 3, 4},
2618 {&i486_cost, 16, 15, 16, 15, 16},
2619 {&pentium_cost, 16, 7, 16, 7, 16},
2620 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2621 {&geode_cost, 0, 0, 0, 0, 0},
2622 {&k6_cost, 32, 7, 32, 7, 32},
2623 {&athlon_cost, 16, 7, 16, 7, 16},
2624 {&pentium4_cost, 0, 0, 0, 0, 0},
2625 {&k8_cost, 16, 7, 16, 7, 16},
2626 {&nocona_cost, 0, 0, 0, 0, 0},
2627 /* Core 2 32-bit. */
2628 {&generic32_cost, 16, 10, 16, 10, 16},
2629 /* Core 2 64-bit. */
2630 {&generic64_cost, 16, 10, 16, 10, 16},
2631 /* Core i7 32-bit. */
2632 {&generic32_cost, 16, 10, 16, 10, 16},
2633 /* Core i7 64-bit. */
2634 {&generic64_cost, 16, 10, 16, 10, 16},
2635 {&generic32_cost, 16, 7, 16, 7, 16},
2636 {&generic64_cost, 16, 10, 16, 10, 16},
2637 {&amdfam10_cost, 32, 24, 32, 7, 32},
2638 {&bdver1_cost, 32, 24, 32, 7, 32},
2639 {&btver1_cost, 32, 24, 32, 7, 32},
2640 {&atom_cost, 16, 7, 16, 7, 16}
2641 };
2642
2643 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2644 {
2645 "generic",
2646 "i386",
2647 "i486",
2648 "pentium",
2649 "pentium-mmx",
2650 "pentiumpro",
2651 "pentium2",
2652 "pentium3",
2653 "pentium4",
2654 "pentium-m",
2655 "prescott",
2656 "nocona",
2657 "core2",
2658 "corei7",
2659 "atom",
2660 "geode",
2661 "k6",
2662 "k6-2",
2663 "k6-3",
2664 "athlon",
2665 "athlon-4",
2666 "k8",
2667 "amdfam10",
2668 "bdver1",
2669 "btver1"
2670 };
2671 \f
2672 /* Return true if a red-zone is in use. */
2673
2674 static inline bool
2675 ix86_using_red_zone (void)
2676 {
2677 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2678 }
2679
2680 /* Implement TARGET_HANDLE_OPTION. */
2681
2682 static bool
2683 ix86_handle_option (struct gcc_options *opts,
2684 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
2685 const struct cl_decoded_option *decoded,
2686 location_t loc ATTRIBUTE_UNUSED)
2687 {
2688 size_t code = decoded->opt_index;
2689 int value = decoded->value;
2690
2691 switch (code)
2692 {
2693 case OPT_mmmx:
2694 if (value)
2695 {
2696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2697 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2698 }
2699 else
2700 {
2701 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2702 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2703 }
2704 return true;
2705
2706 case OPT_m3dnow:
2707 if (value)
2708 {
2709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2710 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2711 }
2712 else
2713 {
2714 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2715 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2716 }
2717 return true;
2718
2719 case OPT_m3dnowa:
2720 return false;
2721
2722 case OPT_msse:
2723 if (value)
2724 {
2725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2726 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2727 }
2728 else
2729 {
2730 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2731 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2732 }
2733 return true;
2734
2735 case OPT_msse2:
2736 if (value)
2737 {
2738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2739 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2740 }
2741 else
2742 {
2743 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2744 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2745 }
2746 return true;
2747
2748 case OPT_msse3:
2749 if (value)
2750 {
2751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2752 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2753 }
2754 else
2755 {
2756 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2757 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2758 }
2759 return true;
2760
2761 case OPT_mssse3:
2762 if (value)
2763 {
2764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2765 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2766 }
2767 else
2768 {
2769 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2770 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2771 }
2772 return true;
2773
2774 case OPT_msse4_1:
2775 if (value)
2776 {
2777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2778 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2779 }
2780 else
2781 {
2782 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2783 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2784 }
2785 return true;
2786
2787 case OPT_msse4_2:
2788 if (value)
2789 {
2790 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2791 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2792 }
2793 else
2794 {
2795 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2796 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2797 }
2798 return true;
2799
2800 case OPT_mavx:
2801 if (value)
2802 {
2803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2804 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2805 }
2806 else
2807 {
2808 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2809 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2810 }
2811 return true;
2812
2813 case OPT_mfma:
2814 if (value)
2815 {
2816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2817 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2818 }
2819 else
2820 {
2821 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2822 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2823 }
2824 return true;
2825
2826 case OPT_msse4:
2827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2828 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2829 return true;
2830
2831 case OPT_mno_sse4:
2832 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2833 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2834 return true;
2835
2836 case OPT_msse4a:
2837 if (value)
2838 {
2839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2840 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2841 }
2842 else
2843 {
2844 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2845 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2846 }
2847 return true;
2848
2849 case OPT_mfma4:
2850 if (value)
2851 {
2852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2853 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2854 }
2855 else
2856 {
2857 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2858 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2859 }
2860 return true;
2861
2862 case OPT_mxop:
2863 if (value)
2864 {
2865 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2866 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2867 }
2868 else
2869 {
2870 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2871 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2872 }
2873 return true;
2874
2875 case OPT_mlwp:
2876 if (value)
2877 {
2878 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2879 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2880 }
2881 else
2882 {
2883 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2884 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2885 }
2886 return true;
2887
2888 case OPT_mabm:
2889 if (value)
2890 {
2891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2892 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2893 }
2894 else
2895 {
2896 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2897 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2898 }
2899 return true;
2900
2901 case OPT_mbmi:
2902 if (value)
2903 {
2904 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2905 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2906 }
2907 else
2908 {
2909 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2910 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2911 }
2912 return true;
2913
2914 case OPT_mtbm:
2915 if (value)
2916 {
2917 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2918 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2919 }
2920 else
2921 {
2922 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2923 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2924 }
2925 return true;
2926
2927 case OPT_mpopcnt:
2928 if (value)
2929 {
2930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2931 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2932 }
2933 else
2934 {
2935 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2936 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2937 }
2938 return true;
2939
2940 case OPT_msahf:
2941 if (value)
2942 {
2943 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2944 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2945 }
2946 else
2947 {
2948 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2949 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2950 }
2951 return true;
2952
2953 case OPT_mcx16:
2954 if (value)
2955 {
2956 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2957 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2958 }
2959 else
2960 {
2961 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2962 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2963 }
2964 return true;
2965
2966 case OPT_mmovbe:
2967 if (value)
2968 {
2969 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2970 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2971 }
2972 else
2973 {
2974 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2975 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2976 }
2977 return true;
2978
2979 case OPT_mcrc32:
2980 if (value)
2981 {
2982 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2983 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2984 }
2985 else
2986 {
2987 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2988 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2989 }
2990 return true;
2991
2992 case OPT_maes:
2993 if (value)
2994 {
2995 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2996 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2997 }
2998 else
2999 {
3000 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
3001 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
3002 }
3003 return true;
3004
3005 case OPT_mpclmul:
3006 if (value)
3007 {
3008 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
3009 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
3010 }
3011 else
3012 {
3013 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
3014 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3015 }
3016 return true;
3017
3018 case OPT_mfsgsbase:
3019 if (value)
3020 {
3021 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3022 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3023 }
3024 else
3025 {
3026 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3027 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3028 }
3029 return true;
3030
3031 case OPT_mrdrnd:
3032 if (value)
3033 {
3034 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3035 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3036 }
3037 else
3038 {
3039 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3040 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3041 }
3042 return true;
3043
3044 case OPT_mf16c:
3045 if (value)
3046 {
3047 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3048 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3049 }
3050 else
3051 {
3052 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3053 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3054 }
3055 return true;
3056
3057 default:
3058 return true;
3059 }
3060 }
3061 \f
3062 /* Return a string that documents the current -m options. The caller is
3063 responsible for freeing the string. */
3064
3065 static char *
3066 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3067 const char *fpmath, bool add_nl_p)
3068 {
3069 struct ix86_target_opts
3070 {
3071 const char *option; /* option string */
3072 int mask; /* isa mask options */
3073 };
3074
3075 /* This table is ordered so that options like -msse4.2 that imply
3076 preceding options while match those first. */
3077 static struct ix86_target_opts isa_opts[] =
3078 {
3079 { "-m64", OPTION_MASK_ISA_64BIT },
3080 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3081 { "-mfma", OPTION_MASK_ISA_FMA },
3082 { "-mxop", OPTION_MASK_ISA_XOP },
3083 { "-mlwp", OPTION_MASK_ISA_LWP },
3084 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3085 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3086 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3087 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3088 { "-msse3", OPTION_MASK_ISA_SSE3 },
3089 { "-msse2", OPTION_MASK_ISA_SSE2 },
3090 { "-msse", OPTION_MASK_ISA_SSE },
3091 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3092 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3093 { "-mmmx", OPTION_MASK_ISA_MMX },
3094 { "-mabm", OPTION_MASK_ISA_ABM },
3095 { "-mbmi", OPTION_MASK_ISA_BMI },
3096 { "-mtbm", OPTION_MASK_ISA_TBM },
3097 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3098 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3099 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3100 { "-maes", OPTION_MASK_ISA_AES },
3101 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3102 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3103 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3104 { "-mf16c", OPTION_MASK_ISA_F16C },
3105 };
3106
3107 /* Flag options. */
3108 static struct ix86_target_opts flag_opts[] =
3109 {
3110 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3111 { "-m80387", MASK_80387 },
3112 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3113 { "-malign-double", MASK_ALIGN_DOUBLE },
3114 { "-mcld", MASK_CLD },
3115 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3116 { "-mieee-fp", MASK_IEEE_FP },
3117 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3118 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3119 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3120 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3121 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3122 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3123 { "-mno-red-zone", MASK_NO_RED_ZONE },
3124 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3125 { "-mrecip", MASK_RECIP },
3126 { "-mrtd", MASK_RTD },
3127 { "-msseregparm", MASK_SSEREGPARM },
3128 { "-mstack-arg-probe", MASK_STACK_PROBE },
3129 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3130 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3131 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3132 { "-mvzeroupper", MASK_VZEROUPPER },
3133 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3134 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3135 };
3136
3137 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3138
3139 char isa_other[40];
3140 char target_other[40];
3141 unsigned num = 0;
3142 unsigned i, j;
3143 char *ret;
3144 char *ptr;
3145 size_t len;
3146 size_t line_len;
3147 size_t sep_len;
3148
3149 memset (opts, '\0', sizeof (opts));
3150
3151 /* Add -march= option. */
3152 if (arch)
3153 {
3154 opts[num][0] = "-march=";
3155 opts[num++][1] = arch;
3156 }
3157
3158 /* Add -mtune= option. */
3159 if (tune)
3160 {
3161 opts[num][0] = "-mtune=";
3162 opts[num++][1] = tune;
3163 }
3164
3165 /* Pick out the options in isa options. */
3166 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3167 {
3168 if ((isa & isa_opts[i].mask) != 0)
3169 {
3170 opts[num++][0] = isa_opts[i].option;
3171 isa &= ~ isa_opts[i].mask;
3172 }
3173 }
3174
3175 if (isa && add_nl_p)
3176 {
3177 opts[num++][0] = isa_other;
3178 sprintf (isa_other, "(other isa: %#x)", isa);
3179 }
3180
3181 /* Add flag options. */
3182 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3183 {
3184 if ((flags & flag_opts[i].mask) != 0)
3185 {
3186 opts[num++][0] = flag_opts[i].option;
3187 flags &= ~ flag_opts[i].mask;
3188 }
3189 }
3190
3191 if (flags && add_nl_p)
3192 {
3193 opts[num++][0] = target_other;
3194 sprintf (target_other, "(other flags: %#x)", flags);
3195 }
3196
3197 /* Add -fpmath= option. */
3198 if (fpmath)
3199 {
3200 opts[num][0] = "-mfpmath=";
3201 opts[num++][1] = fpmath;
3202 }
3203
3204 /* Any options? */
3205 if (num == 0)
3206 return NULL;
3207
3208 gcc_assert (num < ARRAY_SIZE (opts));
3209
3210 /* Size the string. */
3211 len = 0;
3212 sep_len = (add_nl_p) ? 3 : 1;
3213 for (i = 0; i < num; i++)
3214 {
3215 len += sep_len;
3216 for (j = 0; j < 2; j++)
3217 if (opts[i][j])
3218 len += strlen (opts[i][j]);
3219 }
3220
3221 /* Build the string. */
3222 ret = ptr = (char *) xmalloc (len);
3223 line_len = 0;
3224
3225 for (i = 0; i < num; i++)
3226 {
3227 size_t len2[2];
3228
3229 for (j = 0; j < 2; j++)
3230 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3231
3232 if (i != 0)
3233 {
3234 *ptr++ = ' ';
3235 line_len++;
3236
3237 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3238 {
3239 *ptr++ = '\\';
3240 *ptr++ = '\n';
3241 line_len = 0;
3242 }
3243 }
3244
3245 for (j = 0; j < 2; j++)
3246 if (opts[i][j])
3247 {
3248 memcpy (ptr, opts[i][j], len2[j]);
3249 ptr += len2[j];
3250 line_len += len2[j];
3251 }
3252 }
3253
3254 *ptr = '\0';
3255 gcc_assert (ret + len >= ptr);
3256
3257 return ret;
3258 }
3259
3260 /* Return TRUE if software prefetching is beneficial for the
3261 given CPU. */
3262
3263 static bool
3264 software_prefetching_beneficial_p (void)
3265 {
3266 switch (ix86_tune)
3267 {
3268 case PROCESSOR_GEODE:
3269 case PROCESSOR_K6:
3270 case PROCESSOR_ATHLON:
3271 case PROCESSOR_K8:
3272 case PROCESSOR_AMDFAM10:
3273 case PROCESSOR_BTVER1:
3274 return true;
3275
3276 default:
3277 return false;
3278 }
3279 }
3280
3281 /* Return true, if profiling code should be emitted before
3282 prologue. Otherwise it returns false.
3283 Note: For x86 with "hotfix" it is sorried. */
3284 static bool
3285 ix86_profile_before_prologue (void)
3286 {
3287 return flag_fentry != 0;
3288 }
3289
3290 /* Function that is callable from the debugger to print the current
3291 options. */
3292 void
3293 ix86_debug_options (void)
3294 {
3295 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3296 ix86_arch_string, ix86_tune_string,
3297 ix86_fpmath_string, true);
3298
3299 if (opts)
3300 {
3301 fprintf (stderr, "%s\n\n", opts);
3302 free (opts);
3303 }
3304 else
3305 fputs ("<no options>\n\n", stderr);
3306
3307 return;
3308 }
3309 \f
3310 /* Override various settings based on options. If MAIN_ARGS_P, the
3311 options are from the command line, otherwise they are from
3312 attributes. */
3313
3314 static void
3315 ix86_option_override_internal (bool main_args_p)
3316 {
3317 int i;
3318 unsigned int ix86_arch_mask, ix86_tune_mask;
3319 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3320 const char *prefix;
3321 const char *suffix;
3322 const char *sw;
3323
3324 /* Comes from final.c -- no real reason to change it. */
3325 #define MAX_CODE_ALIGN 16
3326
3327 enum pta_flags
3328 {
3329 PTA_SSE = 1 << 0,
3330 PTA_SSE2 = 1 << 1,
3331 PTA_SSE3 = 1 << 2,
3332 PTA_MMX = 1 << 3,
3333 PTA_PREFETCH_SSE = 1 << 4,
3334 PTA_3DNOW = 1 << 5,
3335 PTA_3DNOW_A = 1 << 6,
3336 PTA_64BIT = 1 << 7,
3337 PTA_SSSE3 = 1 << 8,
3338 PTA_CX16 = 1 << 9,
3339 PTA_POPCNT = 1 << 10,
3340 PTA_ABM = 1 << 11,
3341 PTA_SSE4A = 1 << 12,
3342 PTA_NO_SAHF = 1 << 13,
3343 PTA_SSE4_1 = 1 << 14,
3344 PTA_SSE4_2 = 1 << 15,
3345 PTA_AES = 1 << 16,
3346 PTA_PCLMUL = 1 << 17,
3347 PTA_AVX = 1 << 18,
3348 PTA_FMA = 1 << 19,
3349 PTA_MOVBE = 1 << 20,
3350 PTA_FMA4 = 1 << 21,
3351 PTA_XOP = 1 << 22,
3352 PTA_LWP = 1 << 23,
3353 PTA_FSGSBASE = 1 << 24,
3354 PTA_RDRND = 1 << 25,
3355 PTA_F16C = 1 << 26,
3356 PTA_BMI = 1 << 27,
3357 PTA_TBM = 1 << 28
3358 /* if this reaches 32, need to widen struct pta flags below */
3359 };
3360
3361 static struct pta
3362 {
3363 const char *const name; /* processor name or nickname. */
3364 const enum processor_type processor;
3365 const enum attr_cpu schedule;
3366 const unsigned /*enum pta_flags*/ flags;
3367 }
3368 const processor_alias_table[] =
3369 {
3370 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3371 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3372 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3373 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3374 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3375 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3376 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3377 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3378 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3379 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3380 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3381 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3382 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3383 PTA_MMX | PTA_SSE},
3384 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3385 PTA_MMX | PTA_SSE},
3386 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3387 PTA_MMX | PTA_SSE | PTA_SSE2},
3388 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3389 PTA_MMX |PTA_SSE | PTA_SSE2},
3390 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3391 PTA_MMX | PTA_SSE | PTA_SSE2},
3392 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3393 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3394 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3395 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3396 | PTA_CX16 | PTA_NO_SAHF},
3397 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3398 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3399 | PTA_SSSE3 | PTA_CX16},
3400 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3401 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3402 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3403 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3404 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3405 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3406 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3407 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3408 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3409 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3410 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3411 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3412 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3413 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3414 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3415 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3416 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3417 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3418 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3419 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3420 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3421 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3422 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3423 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3424 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3425 {"x86-64", PROCESSOR_K8, CPU_K8,
3426 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3427 {"k8", PROCESSOR_K8, CPU_K8,
3428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3429 | PTA_SSE2 | PTA_NO_SAHF},
3430 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3432 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3433 {"opteron", PROCESSOR_K8, CPU_K8,
3434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3435 | PTA_SSE2 | PTA_NO_SAHF},
3436 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3438 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3439 {"athlon64", PROCESSOR_K8, CPU_K8,
3440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3441 | PTA_SSE2 | PTA_NO_SAHF},
3442 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3444 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3445 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3447 | PTA_SSE2 | PTA_NO_SAHF},
3448 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3450 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3451 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3452 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3453 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3454 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3455 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3456 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3457 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3458 | PTA_XOP | PTA_LWP},
3459 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3460 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3461 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3462 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3463 0 /* flags are only used for -march switch. */ },
3464 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3465 PTA_64BIT /* flags are only used for -march switch. */ },
3466 };
3467
3468 int const pta_size = ARRAY_SIZE (processor_alias_table);
3469
3470 /* Set up prefix/suffix so the error messages refer to either the command
3471 line argument, or the attribute(target). */
3472 if (main_args_p)
3473 {
3474 prefix = "-m";
3475 suffix = "";
3476 sw = "switch";
3477 }
3478 else
3479 {
3480 prefix = "option(\"";
3481 suffix = "\")";
3482 sw = "attribute";
3483 }
3484
3485 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3486 SUBTARGET_OVERRIDE_OPTIONS;
3487 #endif
3488
3489 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3490 SUBSUBTARGET_OVERRIDE_OPTIONS;
3491 #endif
3492
3493 /* -fPIC is the default for x86_64. */
3494 if (TARGET_MACHO && TARGET_64BIT)
3495 flag_pic = 2;
3496
3497 /* Need to check -mtune=generic first. */
3498 if (ix86_tune_string)
3499 {
3500 if (!strcmp (ix86_tune_string, "generic")
3501 || !strcmp (ix86_tune_string, "i686")
3502 /* As special support for cross compilers we read -mtune=native
3503 as -mtune=generic. With native compilers we won't see the
3504 -mtune=native, as it was changed by the driver. */
3505 || !strcmp (ix86_tune_string, "native"))
3506 {
3507 if (TARGET_64BIT)
3508 ix86_tune_string = "generic64";
3509 else
3510 ix86_tune_string = "generic32";
3511 }
3512 /* If this call is for setting the option attribute, allow the
3513 generic32/generic64 that was previously set. */
3514 else if (!main_args_p
3515 && (!strcmp (ix86_tune_string, "generic32")
3516 || !strcmp (ix86_tune_string, "generic64")))
3517 ;
3518 else if (!strncmp (ix86_tune_string, "generic", 7))
3519 error ("bad value (%s) for %stune=%s %s",
3520 ix86_tune_string, prefix, suffix, sw);
3521 else if (!strcmp (ix86_tune_string, "x86-64"))
3522 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3523 "%stune=k8%s or %stune=generic%s instead as appropriate",
3524 prefix, suffix, prefix, suffix, prefix, suffix);
3525 }
3526 else
3527 {
3528 if (ix86_arch_string)
3529 ix86_tune_string = ix86_arch_string;
3530 if (!ix86_tune_string)
3531 {
3532 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3533 ix86_tune_defaulted = 1;
3534 }
3535
3536 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3537 need to use a sensible tune option. */
3538 if (!strcmp (ix86_tune_string, "generic")
3539 || !strcmp (ix86_tune_string, "x86-64")
3540 || !strcmp (ix86_tune_string, "i686"))
3541 {
3542 if (TARGET_64BIT)
3543 ix86_tune_string = "generic64";
3544 else
3545 ix86_tune_string = "generic32";
3546 }
3547 }
3548
3549 if (ix86_stringop_string)
3550 {
3551 if (!strcmp (ix86_stringop_string, "rep_byte"))
3552 stringop_alg = rep_prefix_1_byte;
3553 else if (!strcmp (ix86_stringop_string, "libcall"))
3554 stringop_alg = libcall;
3555 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3556 stringop_alg = rep_prefix_4_byte;
3557 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3558 && TARGET_64BIT)
3559 /* rep; movq isn't available in 32-bit code. */
3560 stringop_alg = rep_prefix_8_byte;
3561 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3562 stringop_alg = loop_1_byte;
3563 else if (!strcmp (ix86_stringop_string, "loop"))
3564 stringop_alg = loop;
3565 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3566 stringop_alg = unrolled_loop;
3567 else
3568 error ("bad value (%s) for %sstringop-strategy=%s %s",
3569 ix86_stringop_string, prefix, suffix, sw);
3570 }
3571
3572 if (!ix86_arch_string)
3573 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3574 else
3575 ix86_arch_specified = 1;
3576
3577 /* Validate -mabi= value. */
3578 if (ix86_abi_string)
3579 {
3580 if (strcmp (ix86_abi_string, "sysv") == 0)
3581 ix86_abi = SYSV_ABI;
3582 else if (strcmp (ix86_abi_string, "ms") == 0)
3583 ix86_abi = MS_ABI;
3584 else
3585 error ("unknown ABI (%s) for %sabi=%s %s",
3586 ix86_abi_string, prefix, suffix, sw);
3587 }
3588 else
3589 ix86_abi = DEFAULT_ABI;
3590
3591 if (ix86_cmodel_string != 0)
3592 {
3593 if (!strcmp (ix86_cmodel_string, "small"))
3594 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3595 else if (!strcmp (ix86_cmodel_string, "medium"))
3596 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3597 else if (!strcmp (ix86_cmodel_string, "large"))
3598 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3599 else if (flag_pic)
3600 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3601 else if (!strcmp (ix86_cmodel_string, "32"))
3602 ix86_cmodel = CM_32;
3603 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3604 ix86_cmodel = CM_KERNEL;
3605 else
3606 error ("bad value (%s) for %scmodel=%s %s",
3607 ix86_cmodel_string, prefix, suffix, sw);
3608 }
3609 else
3610 {
3611 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3612 use of rip-relative addressing. This eliminates fixups that
3613 would otherwise be needed if this object is to be placed in a
3614 DLL, and is essentially just as efficient as direct addressing. */
3615 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3616 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3617 else if (TARGET_64BIT)
3618 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3619 else
3620 ix86_cmodel = CM_32;
3621 }
3622 if (ix86_asm_string != 0)
3623 {
3624 if (! TARGET_MACHO
3625 && !strcmp (ix86_asm_string, "intel"))
3626 ix86_asm_dialect = ASM_INTEL;
3627 else if (!strcmp (ix86_asm_string, "att"))
3628 ix86_asm_dialect = ASM_ATT;
3629 else
3630 error ("bad value (%s) for %sasm=%s %s",
3631 ix86_asm_string, prefix, suffix, sw);
3632 }
3633 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3634 error ("code model %qs not supported in the %s bit mode",
3635 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3636 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3637 sorry ("%i-bit mode not compiled in",
3638 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3639
3640 for (i = 0; i < pta_size; i++)
3641 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3642 {
3643 ix86_schedule = processor_alias_table[i].schedule;
3644 ix86_arch = processor_alias_table[i].processor;
3645 /* Default cpu tuning to the architecture. */
3646 ix86_tune = ix86_arch;
3647
3648 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3649 error ("CPU you selected does not support x86-64 "
3650 "instruction set");
3651
3652 if (processor_alias_table[i].flags & PTA_MMX
3653 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3654 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3655 if (processor_alias_table[i].flags & PTA_3DNOW
3656 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3657 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3658 if (processor_alias_table[i].flags & PTA_3DNOW_A
3659 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3660 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3661 if (processor_alias_table[i].flags & PTA_SSE
3662 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3663 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3664 if (processor_alias_table[i].flags & PTA_SSE2
3665 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3666 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3667 if (processor_alias_table[i].flags & PTA_SSE3
3668 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3669 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3670 if (processor_alias_table[i].flags & PTA_SSSE3
3671 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3672 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3673 if (processor_alias_table[i].flags & PTA_SSE4_1
3674 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3675 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3676 if (processor_alias_table[i].flags & PTA_SSE4_2
3677 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3678 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3679 if (processor_alias_table[i].flags & PTA_AVX
3680 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3681 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3682 if (processor_alias_table[i].flags & PTA_FMA
3683 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3684 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3685 if (processor_alias_table[i].flags & PTA_SSE4A
3686 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3687 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3688 if (processor_alias_table[i].flags & PTA_FMA4
3689 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3690 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3691 if (processor_alias_table[i].flags & PTA_XOP
3692 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3693 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3694 if (processor_alias_table[i].flags & PTA_LWP
3695 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3696 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3697 if (processor_alias_table[i].flags & PTA_ABM
3698 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3699 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3700 if (processor_alias_table[i].flags & PTA_BMI
3701 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3702 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3703 if (processor_alias_table[i].flags & PTA_TBM
3704 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3705 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3706 if (processor_alias_table[i].flags & PTA_CX16
3707 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3708 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3709 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3710 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3711 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3712 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3713 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3714 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3715 if (processor_alias_table[i].flags & PTA_MOVBE
3716 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3717 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3718 if (processor_alias_table[i].flags & PTA_AES
3719 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3720 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3721 if (processor_alias_table[i].flags & PTA_PCLMUL
3722 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3723 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3724 if (processor_alias_table[i].flags & PTA_FSGSBASE
3725 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3726 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3727 if (processor_alias_table[i].flags & PTA_RDRND
3728 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3729 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3730 if (processor_alias_table[i].flags & PTA_F16C
3731 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3732 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3733 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3734 x86_prefetch_sse = true;
3735
3736 break;
3737 }
3738
3739 if (!strcmp (ix86_arch_string, "generic"))
3740 error ("generic CPU can be used only for %stune=%s %s",
3741 prefix, suffix, sw);
3742 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3743 error ("bad value (%s) for %sarch=%s %s",
3744 ix86_arch_string, prefix, suffix, sw);
3745
3746 ix86_arch_mask = 1u << ix86_arch;
3747 for (i = 0; i < X86_ARCH_LAST; ++i)
3748 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3749
3750 for (i = 0; i < pta_size; i++)
3751 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3752 {
3753 ix86_schedule = processor_alias_table[i].schedule;
3754 ix86_tune = processor_alias_table[i].processor;
3755 if (TARGET_64BIT)
3756 {
3757 if (!(processor_alias_table[i].flags & PTA_64BIT))
3758 {
3759 if (ix86_tune_defaulted)
3760 {
3761 ix86_tune_string = "x86-64";
3762 for (i = 0; i < pta_size; i++)
3763 if (! strcmp (ix86_tune_string,
3764 processor_alias_table[i].name))
3765 break;
3766 ix86_schedule = processor_alias_table[i].schedule;
3767 ix86_tune = processor_alias_table[i].processor;
3768 }
3769 else
3770 error ("CPU you selected does not support x86-64 "
3771 "instruction set");
3772 }
3773 }
3774 else
3775 {
3776 /* Adjust tuning when compiling for 32-bit ABI. */
3777 switch (ix86_tune)
3778 {
3779 case PROCESSOR_GENERIC64:
3780 ix86_tune = PROCESSOR_GENERIC32;
3781 ix86_schedule = CPU_PENTIUMPRO;
3782 break;
3783
3784 case PROCESSOR_CORE2_64:
3785 ix86_tune = PROCESSOR_CORE2_32;
3786 break;
3787
3788 case PROCESSOR_COREI7_64:
3789 ix86_tune = PROCESSOR_COREI7_32;
3790 break;
3791
3792 default:
3793 break;
3794 }
3795 }
3796 /* Intel CPUs have always interpreted SSE prefetch instructions as
3797 NOPs; so, we can enable SSE prefetch instructions even when
3798 -mtune (rather than -march) points us to a processor that has them.
3799 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3800 higher processors. */
3801 if (TARGET_CMOVE
3802 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3803 x86_prefetch_sse = true;
3804 break;
3805 }
3806
3807 if (ix86_tune_specified && i == pta_size)
3808 error ("bad value (%s) for %stune=%s %s",
3809 ix86_tune_string, prefix, suffix, sw);
3810
3811 ix86_tune_mask = 1u << ix86_tune;
3812 for (i = 0; i < X86_TUNE_LAST; ++i)
3813 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3814
3815 #ifndef USE_IX86_FRAME_POINTER
3816 #define USE_IX86_FRAME_POINTER 0
3817 #endif
3818
3819 #ifndef USE_X86_64_FRAME_POINTER
3820 #define USE_X86_64_FRAME_POINTER 0
3821 #endif
3822
3823 /* Set the default values for switches whose default depends on TARGET_64BIT
3824 in case they weren't overwritten by command line options. */
3825 if (TARGET_64BIT)
3826 {
3827 if (optimize > 1 && !global_options_set.x_flag_zee)
3828 flag_zee = 1;
3829 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3830 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3831 if (flag_asynchronous_unwind_tables == 2)
3832 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3833 if (flag_pcc_struct_return == 2)
3834 flag_pcc_struct_return = 0;
3835 }
3836 else
3837 {
3838 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3839 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3840 if (flag_asynchronous_unwind_tables == 2)
3841 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3842 if (flag_pcc_struct_return == 2)
3843 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3844 }
3845
3846 if (optimize_size)
3847 ix86_cost = &ix86_size_cost;
3848 else
3849 ix86_cost = processor_target_table[ix86_tune].cost;
3850
3851 /* Arrange to set up i386_stack_locals for all functions. */
3852 init_machine_status = ix86_init_machine_status;
3853
3854 /* Validate -mregparm= value. */
3855 if (ix86_regparm_string)
3856 {
3857 if (TARGET_64BIT)
3858 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3859 i = atoi (ix86_regparm_string);
3860 if (i < 0 || i > REGPARM_MAX)
3861 error ("%sregparm=%d%s is not between 0 and %d",
3862 prefix, i, suffix, REGPARM_MAX);
3863 else
3864 ix86_regparm = i;
3865 }
3866 if (TARGET_64BIT)
3867 ix86_regparm = REGPARM_MAX;
3868
3869 /* If the user has provided any of the -malign-* options,
3870 warn and use that value only if -falign-* is not set.
3871 Remove this code in GCC 3.2 or later. */
3872 if (ix86_align_loops_string)
3873 {
3874 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3875 prefix, suffix, suffix);
3876 if (align_loops == 0)
3877 {
3878 i = atoi (ix86_align_loops_string);
3879 if (i < 0 || i > MAX_CODE_ALIGN)
3880 error ("%salign-loops=%d%s is not between 0 and %d",
3881 prefix, i, suffix, MAX_CODE_ALIGN);
3882 else
3883 align_loops = 1 << i;
3884 }
3885 }
3886
3887 if (ix86_align_jumps_string)
3888 {
3889 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3890 prefix, suffix, suffix);
3891 if (align_jumps == 0)
3892 {
3893 i = atoi (ix86_align_jumps_string);
3894 if (i < 0 || i > MAX_CODE_ALIGN)
3895 error ("%salign-loops=%d%s is not between 0 and %d",
3896 prefix, i, suffix, MAX_CODE_ALIGN);
3897 else
3898 align_jumps = 1 << i;
3899 }
3900 }
3901
3902 if (ix86_align_funcs_string)
3903 {
3904 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3905 prefix, suffix, suffix);
3906 if (align_functions == 0)
3907 {
3908 i = atoi (ix86_align_funcs_string);
3909 if (i < 0 || i > MAX_CODE_ALIGN)
3910 error ("%salign-loops=%d%s is not between 0 and %d",
3911 prefix, i, suffix, MAX_CODE_ALIGN);
3912 else
3913 align_functions = 1 << i;
3914 }
3915 }
3916
3917 /* Default align_* from the processor table. */
3918 if (align_loops == 0)
3919 {
3920 align_loops = processor_target_table[ix86_tune].align_loop;
3921 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3922 }
3923 if (align_jumps == 0)
3924 {
3925 align_jumps = processor_target_table[ix86_tune].align_jump;
3926 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3927 }
3928 if (align_functions == 0)
3929 {
3930 align_functions = processor_target_table[ix86_tune].align_func;
3931 }
3932
3933 /* Validate -mbranch-cost= value, or provide default. */
3934 ix86_branch_cost = ix86_cost->branch_cost;
3935 if (ix86_branch_cost_string)
3936 {
3937 i = atoi (ix86_branch_cost_string);
3938 if (i < 0 || i > 5)
3939 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3940 else
3941 ix86_branch_cost = i;
3942 }
3943 if (ix86_section_threshold_string)
3944 {
3945 i = atoi (ix86_section_threshold_string);
3946 if (i < 0)
3947 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3948 else
3949 ix86_section_threshold = i;
3950 }
3951
3952 if (ix86_tls_dialect_string)
3953 {
3954 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3955 ix86_tls_dialect = TLS_DIALECT_GNU;
3956 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3957 ix86_tls_dialect = TLS_DIALECT_GNU2;
3958 else
3959 error ("bad value (%s) for %stls-dialect=%s %s",
3960 ix86_tls_dialect_string, prefix, suffix, sw);
3961 }
3962
3963 if (ix87_precision_string)
3964 {
3965 i = atoi (ix87_precision_string);
3966 if (i != 32 && i != 64 && i != 80)
3967 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3968 }
3969
3970 if (TARGET_64BIT)
3971 {
3972 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3973
3974 /* Enable by default the SSE and MMX builtins. Do allow the user to
3975 explicitly disable any of these. In particular, disabling SSE and
3976 MMX for kernel code is extremely useful. */
3977 if (!ix86_arch_specified)
3978 ix86_isa_flags
3979 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3980 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3981
3982 if (TARGET_RTD)
3983 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3984 }
3985 else
3986 {
3987 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3988
3989 if (!ix86_arch_specified)
3990 ix86_isa_flags
3991 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3992
3993 /* i386 ABI does not specify red zone. It still makes sense to use it
3994 when programmer takes care to stack from being destroyed. */
3995 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3996 target_flags |= MASK_NO_RED_ZONE;
3997 }
3998
3999 /* Keep nonleaf frame pointers. */
4000 if (flag_omit_frame_pointer)
4001 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4002 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
4003 flag_omit_frame_pointer = 1;
4004
4005 /* If we're doing fast math, we don't care about comparison order
4006 wrt NaNs. This lets us use a shorter comparison sequence. */
4007 if (flag_finite_math_only)
4008 target_flags &= ~MASK_IEEE_FP;
4009
4010 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4011 since the insns won't need emulation. */
4012 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
4013 target_flags &= ~MASK_NO_FANCY_MATH_387;
4014
4015 /* Likewise, if the target doesn't have a 387, or we've specified
4016 software floating point, don't use 387 inline intrinsics. */
4017 if (!TARGET_80387)
4018 target_flags |= MASK_NO_FANCY_MATH_387;
4019
4020 /* Turn on MMX builtins for -msse. */
4021 if (TARGET_SSE)
4022 {
4023 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
4024 x86_prefetch_sse = true;
4025 }
4026
4027 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
4028 if (TARGET_SSE4_2 || TARGET_ABM)
4029 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
4030
4031 /* Validate -mpreferred-stack-boundary= value or default it to
4032 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4033 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4034 if (ix86_preferred_stack_boundary_string)
4035 {
4036 int min = (TARGET_64BIT ? 4 : 2);
4037 int max = (TARGET_SEH ? 4 : 12);
4038
4039 i = atoi (ix86_preferred_stack_boundary_string);
4040 if (i < min || i > max)
4041 {
4042 if (min == max)
4043 error ("%spreferred-stack-boundary%s is not supported "
4044 "for this target", prefix, suffix);
4045 else
4046 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
4047 prefix, i, suffix, min, max);
4048 }
4049 else
4050 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
4051 }
4052
4053 /* Set the default value for -mstackrealign. */
4054 if (ix86_force_align_arg_pointer == -1)
4055 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4056
4057 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4058
4059 /* Validate -mincoming-stack-boundary= value or default it to
4060 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4061 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4062 if (ix86_incoming_stack_boundary_string)
4063 {
4064 i = atoi (ix86_incoming_stack_boundary_string);
4065 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
4066 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4067 i, TARGET_64BIT ? 4 : 2);
4068 else
4069 {
4070 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
4071 ix86_incoming_stack_boundary
4072 = ix86_user_incoming_stack_boundary;
4073 }
4074 }
4075
4076 /* Accept -msseregparm only if at least SSE support is enabled. */
4077 if (TARGET_SSEREGPARM
4078 && ! TARGET_SSE)
4079 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4080
4081 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4082 if (ix86_fpmath_string != 0)
4083 {
4084 if (! strcmp (ix86_fpmath_string, "387"))
4085 ix86_fpmath = FPMATH_387;
4086 else if (! strcmp (ix86_fpmath_string, "sse"))
4087 {
4088 if (!TARGET_SSE)
4089 {
4090 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4091 ix86_fpmath = FPMATH_387;
4092 }
4093 else
4094 ix86_fpmath = FPMATH_SSE;
4095 }
4096 else if (! strcmp (ix86_fpmath_string, "387,sse")
4097 || ! strcmp (ix86_fpmath_string, "387+sse")
4098 || ! strcmp (ix86_fpmath_string, "sse,387")
4099 || ! strcmp (ix86_fpmath_string, "sse+387")
4100 || ! strcmp (ix86_fpmath_string, "both"))
4101 {
4102 if (!TARGET_SSE)
4103 {
4104 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4105 ix86_fpmath = FPMATH_387;
4106 }
4107 else if (!TARGET_80387)
4108 {
4109 warning (0, "387 instruction set disabled, using SSE arithmetics");
4110 ix86_fpmath = FPMATH_SSE;
4111 }
4112 else
4113 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4114 }
4115 else
4116 error ("bad value (%s) for %sfpmath=%s %s",
4117 ix86_fpmath_string, prefix, suffix, sw);
4118 }
4119
4120 /* If the i387 is disabled, then do not return values in it. */
4121 if (!TARGET_80387)
4122 target_flags &= ~MASK_FLOAT_RETURNS;
4123
4124 /* Use external vectorized library in vectorizing intrinsics. */
4125 if (ix86_veclibabi_string)
4126 {
4127 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4128 ix86_veclib_handler = ix86_veclibabi_svml;
4129 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4130 ix86_veclib_handler = ix86_veclibabi_acml;
4131 else
4132 error ("unknown vectorization library ABI type (%s) for "
4133 "%sveclibabi=%s %s", ix86_veclibabi_string,
4134 prefix, suffix, sw);
4135 }
4136
4137 if ((!USE_IX86_FRAME_POINTER
4138 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4139 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4140 && !optimize_size)
4141 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4142
4143 /* ??? Unwind info is not correct around the CFG unless either a frame
4144 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4145 unwind info generation to be aware of the CFG and propagating states
4146 around edges. */
4147 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4148 || flag_exceptions || flag_non_call_exceptions)
4149 && flag_omit_frame_pointer
4150 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4151 {
4152 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4153 warning (0, "unwind tables currently require either a frame pointer "
4154 "or %saccumulate-outgoing-args%s for correctness",
4155 prefix, suffix);
4156 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4157 }
4158
4159 /* If stack probes are required, the space used for large function
4160 arguments on the stack must also be probed, so enable
4161 -maccumulate-outgoing-args so this happens in the prologue. */
4162 if (TARGET_STACK_PROBE
4163 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4164 {
4165 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4166 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4167 "for correctness", prefix, suffix);
4168 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4169 }
4170
4171 /* For sane SSE instruction set generation we need fcomi instruction.
4172 It is safe to enable all CMOVE instructions. */
4173 if (TARGET_SSE)
4174 TARGET_CMOVE = 1;
4175
4176 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4177 {
4178 char *p;
4179 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4180 p = strchr (internal_label_prefix, 'X');
4181 internal_label_prefix_len = p - internal_label_prefix;
4182 *p = '\0';
4183 }
4184
4185 /* When scheduling description is not available, disable scheduler pass
4186 so it won't slow down the compilation and make x87 code slower. */
4187 if (!TARGET_SCHEDULE)
4188 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4189
4190 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4191 ix86_cost->simultaneous_prefetches,
4192 global_options.x_param_values,
4193 global_options_set.x_param_values);
4194 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4195 global_options.x_param_values,
4196 global_options_set.x_param_values);
4197 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4198 global_options.x_param_values,
4199 global_options_set.x_param_values);
4200 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4201 global_options.x_param_values,
4202 global_options_set.x_param_values);
4203
4204 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4205 if (flag_prefetch_loop_arrays < 0
4206 && HAVE_prefetch
4207 && optimize >= 3
4208 && software_prefetching_beneficial_p ())
4209 flag_prefetch_loop_arrays = 1;
4210
4211 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4212 can be optimized to ap = __builtin_next_arg (0). */
4213 if (!TARGET_64BIT && !flag_split_stack)
4214 targetm.expand_builtin_va_start = NULL;
4215
4216 if (TARGET_64BIT)
4217 {
4218 ix86_gen_leave = gen_leave_rex64;
4219 ix86_gen_add3 = gen_adddi3;
4220 ix86_gen_sub3 = gen_subdi3;
4221 ix86_gen_sub3_carry = gen_subdi3_carry;
4222 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4223 ix86_gen_monitor = gen_sse3_monitor64;
4224 ix86_gen_andsp = gen_anddi3;
4225 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4226 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4227 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4228 }
4229 else
4230 {
4231 ix86_gen_leave = gen_leave;
4232 ix86_gen_add3 = gen_addsi3;
4233 ix86_gen_sub3 = gen_subsi3;
4234 ix86_gen_sub3_carry = gen_subsi3_carry;
4235 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4236 ix86_gen_monitor = gen_sse3_monitor;
4237 ix86_gen_andsp = gen_andsi3;
4238 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4239 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4240 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4241 }
4242
4243 #ifdef USE_IX86_CLD
4244 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4245 if (!TARGET_64BIT)
4246 target_flags |= MASK_CLD & ~target_flags_explicit;
4247 #endif
4248
4249 if (!TARGET_64BIT && flag_pic)
4250 {
4251 if (flag_fentry > 0)
4252 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4253 "with -fpic");
4254 flag_fentry = 0;
4255 }
4256 else if (TARGET_SEH)
4257 {
4258 if (flag_fentry == 0)
4259 sorry ("-mno-fentry isn%'t compatible with SEH");
4260 flag_fentry = 1;
4261 }
4262 else if (flag_fentry < 0)
4263 {
4264 #if defined(PROFILE_BEFORE_PROLOGUE)
4265 flag_fentry = 1;
4266 #else
4267 flag_fentry = 0;
4268 #endif
4269 }
4270
4271 /* Save the initial options in case the user does function specific options */
4272 if (main_args_p)
4273 target_option_default_node = target_option_current_node
4274 = build_target_option_node ();
4275
4276 if (TARGET_AVX)
4277 {
4278 /* When not optimize for size, enable vzeroupper optimization for
4279 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4280 AVX unaligned load/store. */
4281 if (!optimize_size)
4282 {
4283 if (flag_expensive_optimizations
4284 && !(target_flags_explicit & MASK_VZEROUPPER))
4285 target_flags |= MASK_VZEROUPPER;
4286 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4287 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4288 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4289 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4290 }
4291 }
4292 else
4293 {
4294 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4295 target_flags &= ~MASK_VZEROUPPER;
4296 }
4297 }
4298
4299 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4300
4301 static bool
4302 function_pass_avx256_p (const_rtx val)
4303 {
4304 if (!val)
4305 return false;
4306
4307 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4308 return true;
4309
4310 if (GET_CODE (val) == PARALLEL)
4311 {
4312 int i;
4313 rtx r;
4314
4315 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4316 {
4317 r = XVECEXP (val, 0, i);
4318 if (GET_CODE (r) == EXPR_LIST
4319 && XEXP (r, 0)
4320 && REG_P (XEXP (r, 0))
4321 && (GET_MODE (XEXP (r, 0)) == OImode
4322 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4323 return true;
4324 }
4325 }
4326
4327 return false;
4328 }
4329
4330 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4331
4332 static void
4333 ix86_option_override (void)
4334 {
4335 ix86_option_override_internal (true);
4336 }
4337
4338 /* Update register usage after having seen the compiler flags. */
4339
4340 static void
4341 ix86_conditional_register_usage (void)
4342 {
4343 int i;
4344 unsigned int j;
4345
4346 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4347 {
4348 if (fixed_regs[i] > 1)
4349 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4350 if (call_used_regs[i] > 1)
4351 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4352 }
4353
4354 /* The PIC register, if it exists, is fixed. */
4355 j = PIC_OFFSET_TABLE_REGNUM;
4356 if (j != INVALID_REGNUM)
4357 fixed_regs[j] = call_used_regs[j] = 1;
4358
4359 /* The 64-bit MS_ABI changes the set of call-used registers. */
4360 if (TARGET_64BIT_MS_ABI)
4361 {
4362 call_used_regs[SI_REG] = 0;
4363 call_used_regs[DI_REG] = 0;
4364 call_used_regs[XMM6_REG] = 0;
4365 call_used_regs[XMM7_REG] = 0;
4366 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4367 call_used_regs[i] = 0;
4368 }
4369
4370 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4371 other call-clobbered regs for 64-bit. */
4372 if (TARGET_64BIT)
4373 {
4374 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4375
4376 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4377 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4378 && call_used_regs[i])
4379 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4380 }
4381
4382 /* If MMX is disabled, squash the registers. */
4383 if (! TARGET_MMX)
4384 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4385 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4387
4388 /* If SSE is disabled, squash the registers. */
4389 if (! TARGET_SSE)
4390 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4391 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4392 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4393
4394 /* If the FPU is disabled, squash the registers. */
4395 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4396 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4397 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4398 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4399
4400 /* If 32-bit, squash the 64-bit registers. */
4401 if (! TARGET_64BIT)
4402 {
4403 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4404 reg_names[i] = "";
4405 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4406 reg_names[i] = "";
4407 }
4408 }
4409
4410 \f
4411 /* Save the current options */
4412
4413 static void
4414 ix86_function_specific_save (struct cl_target_option *ptr)
4415 {
4416 ptr->arch = ix86_arch;
4417 ptr->schedule = ix86_schedule;
4418 ptr->tune = ix86_tune;
4419 ptr->fpmath = ix86_fpmath;
4420 ptr->branch_cost = ix86_branch_cost;
4421 ptr->tune_defaulted = ix86_tune_defaulted;
4422 ptr->arch_specified = ix86_arch_specified;
4423 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4424 ptr->ix86_target_flags_explicit = target_flags_explicit;
4425
4426 /* The fields are char but the variables are not; make sure the
4427 values fit in the fields. */
4428 gcc_assert (ptr->arch == ix86_arch);
4429 gcc_assert (ptr->schedule == ix86_schedule);
4430 gcc_assert (ptr->tune == ix86_tune);
4431 gcc_assert (ptr->fpmath == ix86_fpmath);
4432 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4433 }
4434
4435 /* Restore the current options */
4436
4437 static void
4438 ix86_function_specific_restore (struct cl_target_option *ptr)
4439 {
4440 enum processor_type old_tune = ix86_tune;
4441 enum processor_type old_arch = ix86_arch;
4442 unsigned int ix86_arch_mask, ix86_tune_mask;
4443 int i;
4444
4445 ix86_arch = (enum processor_type) ptr->arch;
4446 ix86_schedule = (enum attr_cpu) ptr->schedule;
4447 ix86_tune = (enum processor_type) ptr->tune;
4448 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4449 ix86_branch_cost = ptr->branch_cost;
4450 ix86_tune_defaulted = ptr->tune_defaulted;
4451 ix86_arch_specified = ptr->arch_specified;
4452 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4453 target_flags_explicit = ptr->ix86_target_flags_explicit;
4454
4455 /* Recreate the arch feature tests if the arch changed */
4456 if (old_arch != ix86_arch)
4457 {
4458 ix86_arch_mask = 1u << ix86_arch;
4459 for (i = 0; i < X86_ARCH_LAST; ++i)
4460 ix86_arch_features[i]
4461 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4462 }
4463
4464 /* Recreate the tune optimization tests */
4465 if (old_tune != ix86_tune)
4466 {
4467 ix86_tune_mask = 1u << ix86_tune;
4468 for (i = 0; i < X86_TUNE_LAST; ++i)
4469 ix86_tune_features[i]
4470 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4471 }
4472 }
4473
4474 /* Print the current options */
4475
4476 static void
4477 ix86_function_specific_print (FILE *file, int indent,
4478 struct cl_target_option *ptr)
4479 {
4480 char *target_string
4481 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4482 NULL, NULL, NULL, false);
4483
4484 fprintf (file, "%*sarch = %d (%s)\n",
4485 indent, "",
4486 ptr->arch,
4487 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4488 ? cpu_names[ptr->arch]
4489 : "<unknown>"));
4490
4491 fprintf (file, "%*stune = %d (%s)\n",
4492 indent, "",
4493 ptr->tune,
4494 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4495 ? cpu_names[ptr->tune]
4496 : "<unknown>"));
4497
4498 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4499 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4500 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4502
4503 if (target_string)
4504 {
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4507 }
4508 }
4509
4510 \f
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4514
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4517 {
4518 char *next_optstr;
4519 bool ret = true;
4520
4521 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4522 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4523 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4524 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4525
4526 enum ix86_opt_type
4527 {
4528 ix86_opt_unknown,
4529 ix86_opt_yes,
4530 ix86_opt_no,
4531 ix86_opt_str,
4532 ix86_opt_isa
4533 };
4534
4535 static const struct
4536 {
4537 const char *string;
4538 size_t len;
4539 enum ix86_opt_type type;
4540 int opt;
4541 int mask;
4542 } attrs[] = {
4543 /* isa options */
4544 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4545 IX86_ATTR_ISA ("abm", OPT_mabm),
4546 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4547 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4548 IX86_ATTR_ISA ("aes", OPT_maes),
4549 IX86_ATTR_ISA ("avx", OPT_mavx),
4550 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4551 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4552 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4553 IX86_ATTR_ISA ("sse", OPT_msse),
4554 IX86_ATTR_ISA ("sse2", OPT_msse2),
4555 IX86_ATTR_ISA ("sse3", OPT_msse3),
4556 IX86_ATTR_ISA ("sse4", OPT_msse4),
4557 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4558 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4559 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4560 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4561 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4562 IX86_ATTR_ISA ("xop", OPT_mxop),
4563 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4564 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4565 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4566 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4567
4568 /* string options */
4569 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4570 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4571 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4572
4573 /* flag options */
4574 IX86_ATTR_YES ("cld",
4575 OPT_mcld,
4576 MASK_CLD),
4577
4578 IX86_ATTR_NO ("fancy-math-387",
4579 OPT_mfancy_math_387,
4580 MASK_NO_FANCY_MATH_387),
4581
4582 IX86_ATTR_YES ("ieee-fp",
4583 OPT_mieee_fp,
4584 MASK_IEEE_FP),
4585
4586 IX86_ATTR_YES ("inline-all-stringops",
4587 OPT_minline_all_stringops,
4588 MASK_INLINE_ALL_STRINGOPS),
4589
4590 IX86_ATTR_YES ("inline-stringops-dynamically",
4591 OPT_minline_stringops_dynamically,
4592 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4593
4594 IX86_ATTR_NO ("align-stringops",
4595 OPT_mno_align_stringops,
4596 MASK_NO_ALIGN_STRINGOPS),
4597
4598 IX86_ATTR_YES ("recip",
4599 OPT_mrecip,
4600 MASK_RECIP),
4601
4602 };
4603
4604 /* If this is a list, recurse to get the options. */
4605 if (TREE_CODE (args) == TREE_LIST)
4606 {
4607 bool ret = true;
4608
4609 for (; args; args = TREE_CHAIN (args))
4610 if (TREE_VALUE (args)
4611 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4612 ret = false;
4613
4614 return ret;
4615 }
4616
4617 else if (TREE_CODE (args) != STRING_CST)
4618 gcc_unreachable ();
4619
4620 /* Handle multiple arguments separated by commas. */
4621 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4622
4623 while (next_optstr && *next_optstr != '\0')
4624 {
4625 char *p = next_optstr;
4626 char *orig_p = p;
4627 char *comma = strchr (next_optstr, ',');
4628 const char *opt_string;
4629 size_t len, opt_len;
4630 int opt;
4631 bool opt_set_p;
4632 char ch;
4633 unsigned i;
4634 enum ix86_opt_type type = ix86_opt_unknown;
4635 int mask = 0;
4636
4637 if (comma)
4638 {
4639 *comma = '\0';
4640 len = comma - next_optstr;
4641 next_optstr = comma + 1;
4642 }
4643 else
4644 {
4645 len = strlen (p);
4646 next_optstr = NULL;
4647 }
4648
4649 /* Recognize no-xxx. */
4650 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4651 {
4652 opt_set_p = false;
4653 p += 3;
4654 len -= 3;
4655 }
4656 else
4657 opt_set_p = true;
4658
4659 /* Find the option. */
4660 ch = *p;
4661 opt = N_OPTS;
4662 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4663 {
4664 type = attrs[i].type;
4665 opt_len = attrs[i].len;
4666 if (ch == attrs[i].string[0]
4667 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4668 && memcmp (p, attrs[i].string, opt_len) == 0)
4669 {
4670 opt = attrs[i].opt;
4671 mask = attrs[i].mask;
4672 opt_string = attrs[i].string;
4673 break;
4674 }
4675 }
4676
4677 /* Process the option. */
4678 if (opt == N_OPTS)
4679 {
4680 error ("attribute(target(\"%s\")) is unknown", orig_p);
4681 ret = false;
4682 }
4683
4684 else if (type == ix86_opt_isa)
4685 {
4686 struct cl_decoded_option decoded;
4687
4688 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4689 ix86_handle_option (&global_options, &global_options_set,
4690 &decoded, input_location);
4691 }
4692
4693 else if (type == ix86_opt_yes || type == ix86_opt_no)
4694 {
4695 if (type == ix86_opt_no)
4696 opt_set_p = !opt_set_p;
4697
4698 if (opt_set_p)
4699 target_flags |= mask;
4700 else
4701 target_flags &= ~mask;
4702 }
4703
4704 else if (type == ix86_opt_str)
4705 {
4706 if (p_strings[opt])
4707 {
4708 error ("option(\"%s\") was already specified", opt_string);
4709 ret = false;
4710 }
4711 else
4712 p_strings[opt] = xstrdup (p + opt_len);
4713 }
4714
4715 else
4716 gcc_unreachable ();
4717 }
4718
4719 return ret;
4720 }
4721
4722 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4723
4724 tree
4725 ix86_valid_target_attribute_tree (tree args)
4726 {
4727 const char *orig_arch_string = ix86_arch_string;
4728 const char *orig_tune_string = ix86_tune_string;
4729 const char *orig_fpmath_string = ix86_fpmath_string;
4730 int orig_tune_defaulted = ix86_tune_defaulted;
4731 int orig_arch_specified = ix86_arch_specified;
4732 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4733 tree t = NULL_TREE;
4734 int i;
4735 struct cl_target_option *def
4736 = TREE_TARGET_OPTION (target_option_default_node);
4737
4738 /* Process each of the options on the chain. */
4739 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4740 return NULL_TREE;
4741
4742 /* If the changed options are different from the default, rerun
4743 ix86_option_override_internal, and then save the options away.
4744 The string options are are attribute options, and will be undone
4745 when we copy the save structure. */
4746 if (ix86_isa_flags != def->x_ix86_isa_flags
4747 || target_flags != def->x_target_flags
4748 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4749 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4750 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4751 {
4752 /* If we are using the default tune= or arch=, undo the string assigned,
4753 and use the default. */
4754 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4755 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4756 else if (!orig_arch_specified)
4757 ix86_arch_string = NULL;
4758
4759 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4760 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4761 else if (orig_tune_defaulted)
4762 ix86_tune_string = NULL;
4763
4764 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4765 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4766 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4767 else if (!TARGET_64BIT && TARGET_SSE)
4768 ix86_fpmath_string = "sse,387";
4769
4770 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4771 ix86_option_override_internal (false);
4772
4773 /* Add any builtin functions with the new isa if any. */
4774 ix86_add_new_builtins (ix86_isa_flags);
4775
4776 /* Save the current options unless we are validating options for
4777 #pragma. */
4778 t = build_target_option_node ();
4779
4780 ix86_arch_string = orig_arch_string;
4781 ix86_tune_string = orig_tune_string;
4782 ix86_fpmath_string = orig_fpmath_string;
4783
4784 /* Free up memory allocated to hold the strings */
4785 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4786 free (option_strings[i]);
4787 }
4788
4789 return t;
4790 }
4791
4792 /* Hook to validate attribute((target("string"))). */
4793
4794 static bool
4795 ix86_valid_target_attribute_p (tree fndecl,
4796 tree ARG_UNUSED (name),
4797 tree args,
4798 int ARG_UNUSED (flags))
4799 {
4800 struct cl_target_option cur_target;
4801 bool ret = true;
4802 tree old_optimize = build_optimization_node ();
4803 tree new_target, new_optimize;
4804 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4805
4806 /* If the function changed the optimization levels as well as setting target
4807 options, start with the optimizations specified. */
4808 if (func_optimize && func_optimize != old_optimize)
4809 cl_optimization_restore (&global_options,
4810 TREE_OPTIMIZATION (func_optimize));
4811
4812 /* The target attributes may also change some optimization flags, so update
4813 the optimization options if necessary. */
4814 cl_target_option_save (&cur_target, &global_options);
4815 new_target = ix86_valid_target_attribute_tree (args);
4816 new_optimize = build_optimization_node ();
4817
4818 if (!new_target)
4819 ret = false;
4820
4821 else if (fndecl)
4822 {
4823 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4824
4825 if (old_optimize != new_optimize)
4826 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4827 }
4828
4829 cl_target_option_restore (&global_options, &cur_target);
4830
4831 if (old_optimize != new_optimize)
4832 cl_optimization_restore (&global_options,
4833 TREE_OPTIMIZATION (old_optimize));
4834
4835 return ret;
4836 }
4837
4838 \f
4839 /* Hook to determine if one function can safely inline another. */
4840
4841 static bool
4842 ix86_can_inline_p (tree caller, tree callee)
4843 {
4844 bool ret = false;
4845 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4846 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4847
4848 /* If callee has no option attributes, then it is ok to inline. */
4849 if (!callee_tree)
4850 ret = true;
4851
4852 /* If caller has no option attributes, but callee does then it is not ok to
4853 inline. */
4854 else if (!caller_tree)
4855 ret = false;
4856
4857 else
4858 {
4859 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4860 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4861
4862 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4863 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4864 function. */
4865 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4866 != callee_opts->x_ix86_isa_flags)
4867 ret = false;
4868
4869 /* See if we have the same non-isa options. */
4870 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4871 ret = false;
4872
4873 /* See if arch, tune, etc. are the same. */
4874 else if (caller_opts->arch != callee_opts->arch)
4875 ret = false;
4876
4877 else if (caller_opts->tune != callee_opts->tune)
4878 ret = false;
4879
4880 else if (caller_opts->fpmath != callee_opts->fpmath)
4881 ret = false;
4882
4883 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4884 ret = false;
4885
4886 else
4887 ret = true;
4888 }
4889
4890 return ret;
4891 }
4892
4893 \f
4894 /* Remember the last target of ix86_set_current_function. */
4895 static GTY(()) tree ix86_previous_fndecl;
4896
4897 /* Establish appropriate back-end context for processing the function
4898 FNDECL. The argument might be NULL to indicate processing at top
4899 level, outside of any function scope. */
4900 static void
4901 ix86_set_current_function (tree fndecl)
4902 {
4903 /* Only change the context if the function changes. This hook is called
4904 several times in the course of compiling a function, and we don't want to
4905 slow things down too much or call target_reinit when it isn't safe. */
4906 if (fndecl && fndecl != ix86_previous_fndecl)
4907 {
4908 tree old_tree = (ix86_previous_fndecl
4909 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4910 : NULL_TREE);
4911
4912 tree new_tree = (fndecl
4913 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4914 : NULL_TREE);
4915
4916 ix86_previous_fndecl = fndecl;
4917 if (old_tree == new_tree)
4918 ;
4919
4920 else if (new_tree)
4921 {
4922 cl_target_option_restore (&global_options,
4923 TREE_TARGET_OPTION (new_tree));
4924 target_reinit ();
4925 }
4926
4927 else if (old_tree)
4928 {
4929 struct cl_target_option *def
4930 = TREE_TARGET_OPTION (target_option_current_node);
4931
4932 cl_target_option_restore (&global_options, def);
4933 target_reinit ();
4934 }
4935 }
4936 }
4937
4938 \f
4939 /* Return true if this goes in large data/bss. */
4940
4941 static bool
4942 ix86_in_large_data_p (tree exp)
4943 {
4944 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4945 return false;
4946
4947 /* Functions are never large data. */
4948 if (TREE_CODE (exp) == FUNCTION_DECL)
4949 return false;
4950
4951 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4952 {
4953 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4954 if (strcmp (section, ".ldata") == 0
4955 || strcmp (section, ".lbss") == 0)
4956 return true;
4957 return false;
4958 }
4959 else
4960 {
4961 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4962
4963 /* If this is an incomplete type with size 0, then we can't put it
4964 in data because it might be too big when completed. */
4965 if (!size || size > ix86_section_threshold)
4966 return true;
4967 }
4968
4969 return false;
4970 }
4971
4972 /* Switch to the appropriate section for output of DECL.
4973 DECL is either a `VAR_DECL' node or a constant of some sort.
4974 RELOC indicates whether forming the initial value of DECL requires
4975 link-time relocations. */
4976
4977 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4978 ATTRIBUTE_UNUSED;
4979
4980 static section *
4981 x86_64_elf_select_section (tree decl, int reloc,
4982 unsigned HOST_WIDE_INT align)
4983 {
4984 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4985 && ix86_in_large_data_p (decl))
4986 {
4987 const char *sname = NULL;
4988 unsigned int flags = SECTION_WRITE;
4989 switch (categorize_decl_for_section (decl, reloc))
4990 {
4991 case SECCAT_DATA:
4992 sname = ".ldata";
4993 break;
4994 case SECCAT_DATA_REL:
4995 sname = ".ldata.rel";
4996 break;
4997 case SECCAT_DATA_REL_LOCAL:
4998 sname = ".ldata.rel.local";
4999 break;
5000 case SECCAT_DATA_REL_RO:
5001 sname = ".ldata.rel.ro";
5002 break;
5003 case SECCAT_DATA_REL_RO_LOCAL:
5004 sname = ".ldata.rel.ro.local";
5005 break;
5006 case SECCAT_BSS:
5007 sname = ".lbss";
5008 flags |= SECTION_BSS;
5009 break;
5010 case SECCAT_RODATA:
5011 case SECCAT_RODATA_MERGE_STR:
5012 case SECCAT_RODATA_MERGE_STR_INIT:
5013 case SECCAT_RODATA_MERGE_CONST:
5014 sname = ".lrodata";
5015 flags = 0;
5016 break;
5017 case SECCAT_SRODATA:
5018 case SECCAT_SDATA:
5019 case SECCAT_SBSS:
5020 gcc_unreachable ();
5021 case SECCAT_TEXT:
5022 case SECCAT_TDATA:
5023 case SECCAT_TBSS:
5024 /* We don't split these for medium model. Place them into
5025 default sections and hope for best. */
5026 break;
5027 }
5028 if (sname)
5029 {
5030 /* We might get called with string constants, but get_named_section
5031 doesn't like them as they are not DECLs. Also, we need to set
5032 flags in that case. */
5033 if (!DECL_P (decl))
5034 return get_section (sname, flags, NULL);
5035 return get_named_section (decl, sname, reloc);
5036 }
5037 }
5038 return default_elf_select_section (decl, reloc, align);
5039 }
5040
5041 /* Build up a unique section name, expressed as a
5042 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5043 RELOC indicates whether the initial value of EXP requires
5044 link-time relocations. */
5045
5046 static void ATTRIBUTE_UNUSED
5047 x86_64_elf_unique_section (tree decl, int reloc)
5048 {
5049 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5050 && ix86_in_large_data_p (decl))
5051 {
5052 const char *prefix = NULL;
5053 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5054 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5055
5056 switch (categorize_decl_for_section (decl, reloc))
5057 {
5058 case SECCAT_DATA:
5059 case SECCAT_DATA_REL:
5060 case SECCAT_DATA_REL_LOCAL:
5061 case SECCAT_DATA_REL_RO:
5062 case SECCAT_DATA_REL_RO_LOCAL:
5063 prefix = one_only ? ".ld" : ".ldata";
5064 break;
5065 case SECCAT_BSS:
5066 prefix = one_only ? ".lb" : ".lbss";
5067 break;
5068 case SECCAT_RODATA:
5069 case SECCAT_RODATA_MERGE_STR:
5070 case SECCAT_RODATA_MERGE_STR_INIT:
5071 case SECCAT_RODATA_MERGE_CONST:
5072 prefix = one_only ? ".lr" : ".lrodata";
5073 break;
5074 case SECCAT_SRODATA:
5075 case SECCAT_SDATA:
5076 case SECCAT_SBSS:
5077 gcc_unreachable ();
5078 case SECCAT_TEXT:
5079 case SECCAT_TDATA:
5080 case SECCAT_TBSS:
5081 /* We don't split these for medium model. Place them into
5082 default sections and hope for best. */
5083 break;
5084 }
5085 if (prefix)
5086 {
5087 const char *name, *linkonce;
5088 char *string;
5089
5090 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5091 name = targetm.strip_name_encoding (name);
5092
5093 /* If we're using one_only, then there needs to be a .gnu.linkonce
5094 prefix to the section name. */
5095 linkonce = one_only ? ".gnu.linkonce" : "";
5096
5097 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5098
5099 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5100 return;
5101 }
5102 }
5103 default_unique_section (decl, reloc);
5104 }
5105
5106 #ifdef COMMON_ASM_OP
5107 /* This says how to output assembler code to declare an
5108 uninitialized external linkage data object.
5109
5110 For medium model x86-64 we need to use .largecomm opcode for
5111 large objects. */
5112 void
5113 x86_elf_aligned_common (FILE *file,
5114 const char *name, unsigned HOST_WIDE_INT size,
5115 int align)
5116 {
5117 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5118 && size > (unsigned int)ix86_section_threshold)
5119 fputs (".largecomm\t", file);
5120 else
5121 fputs (COMMON_ASM_OP, file);
5122 assemble_name (file, name);
5123 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5124 size, align / BITS_PER_UNIT);
5125 }
5126 #endif
5127
5128 /* Utility function for targets to use in implementing
5129 ASM_OUTPUT_ALIGNED_BSS. */
5130
5131 void
5132 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5133 const char *name, unsigned HOST_WIDE_INT size,
5134 int align)
5135 {
5136 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5137 && size > (unsigned int)ix86_section_threshold)
5138 switch_to_section (get_named_section (decl, ".lbss", 0));
5139 else
5140 switch_to_section (bss_section);
5141 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5142 #ifdef ASM_DECLARE_OBJECT_NAME
5143 last_assemble_variable_decl = decl;
5144 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5145 #else
5146 /* Standard thing is just output label for the object. */
5147 ASM_OUTPUT_LABEL (file, name);
5148 #endif /* ASM_DECLARE_OBJECT_NAME */
5149 ASM_OUTPUT_SKIP (file, size ? size : 1);
5150 }
5151 \f
5152 static const struct default_options ix86_option_optimization_table[] =
5153 {
5154 /* Turn off -fschedule-insns by default. It tends to make the
5155 problem with not enough registers even worse. */
5156 #ifdef INSN_SCHEDULING
5157 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5158 #endif
5159
5160 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5161 SUBTARGET_OPTIMIZATION_OPTIONS,
5162 #endif
5163 { OPT_LEVELS_NONE, 0, NULL, 0 }
5164 };
5165
5166 /* Implement TARGET_OPTION_INIT_STRUCT. */
5167
5168 static void
5169 ix86_option_init_struct (struct gcc_options *opts)
5170 {
5171 if (TARGET_MACHO)
5172 /* The Darwin libraries never set errno, so we might as well
5173 avoid calling them when that's the only reason we would. */
5174 opts->x_flag_errno_math = 0;
5175
5176 opts->x_flag_pcc_struct_return = 2;
5177 opts->x_flag_asynchronous_unwind_tables = 2;
5178 opts->x_flag_vect_cost_model = 1;
5179 }
5180
5181 /* Decide whether we must probe the stack before any space allocation
5182 on this target. It's essentially TARGET_STACK_PROBE except when
5183 -fstack-check causes the stack to be already probed differently. */
5184
5185 bool
5186 ix86_target_stack_probe (void)
5187 {
5188 /* Do not probe the stack twice if static stack checking is enabled. */
5189 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5190 return false;
5191
5192 return TARGET_STACK_PROBE;
5193 }
5194 \f
5195 /* Decide whether we can make a sibling call to a function. DECL is the
5196 declaration of the function being targeted by the call and EXP is the
5197 CALL_EXPR representing the call. */
5198
5199 static bool
5200 ix86_function_ok_for_sibcall (tree decl, tree exp)
5201 {
5202 tree type, decl_or_type;
5203 rtx a, b;
5204
5205 /* If we are generating position-independent code, we cannot sibcall
5206 optimize any indirect call, or a direct call to a global function,
5207 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5208 if (!TARGET_MACHO
5209 && !TARGET_64BIT
5210 && flag_pic
5211 && (!decl || !targetm.binds_local_p (decl)))
5212 return false;
5213
5214 /* If we need to align the outgoing stack, then sibcalling would
5215 unalign the stack, which may break the called function. */
5216 if (ix86_minimum_incoming_stack_boundary (true)
5217 < PREFERRED_STACK_BOUNDARY)
5218 return false;
5219
5220 if (decl)
5221 {
5222 decl_or_type = decl;
5223 type = TREE_TYPE (decl);
5224 }
5225 else
5226 {
5227 /* We're looking at the CALL_EXPR, we need the type of the function. */
5228 type = CALL_EXPR_FN (exp); /* pointer expression */
5229 type = TREE_TYPE (type); /* pointer type */
5230 type = TREE_TYPE (type); /* function type */
5231 decl_or_type = type;
5232 }
5233
5234 /* Check that the return value locations are the same. Like
5235 if we are returning floats on the 80387 register stack, we cannot
5236 make a sibcall from a function that doesn't return a float to a
5237 function that does or, conversely, from a function that does return
5238 a float to a function that doesn't; the necessary stack adjustment
5239 would not be executed. This is also the place we notice
5240 differences in the return value ABI. Note that it is ok for one
5241 of the functions to have void return type as long as the return
5242 value of the other is passed in a register. */
5243 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5244 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5245 cfun->decl, false);
5246 if (STACK_REG_P (a) || STACK_REG_P (b))
5247 {
5248 if (!rtx_equal_p (a, b))
5249 return false;
5250 }
5251 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5252 {
5253 /* Disable sibcall if we need to generate vzeroupper after
5254 callee returns. */
5255 if (TARGET_VZEROUPPER
5256 && cfun->machine->callee_return_avx256_p
5257 && !cfun->machine->caller_return_avx256_p)
5258 return false;
5259 }
5260 else if (!rtx_equal_p (a, b))
5261 return false;
5262
5263 if (TARGET_64BIT)
5264 {
5265 /* The SYSV ABI has more call-clobbered registers;
5266 disallow sibcalls from MS to SYSV. */
5267 if (cfun->machine->call_abi == MS_ABI
5268 && ix86_function_type_abi (type) == SYSV_ABI)
5269 return false;
5270 }
5271 else
5272 {
5273 /* If this call is indirect, we'll need to be able to use a
5274 call-clobbered register for the address of the target function.
5275 Make sure that all such registers are not used for passing
5276 parameters. Note that DLLIMPORT functions are indirect. */
5277 if (!decl
5278 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5279 {
5280 if (ix86_function_regparm (type, NULL) >= 3)
5281 {
5282 /* ??? Need to count the actual number of registers to be used,
5283 not the possible number of registers. Fix later. */
5284 return false;
5285 }
5286 }
5287 }
5288
5289 /* Otherwise okay. That also includes certain types of indirect calls. */
5290 return true;
5291 }
5292
5293 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5294 and "sseregparm" calling convention attributes;
5295 arguments as in struct attribute_spec.handler. */
5296
5297 static tree
5298 ix86_handle_cconv_attribute (tree *node, tree name,
5299 tree args,
5300 int flags ATTRIBUTE_UNUSED,
5301 bool *no_add_attrs)
5302 {
5303 if (TREE_CODE (*node) != FUNCTION_TYPE
5304 && TREE_CODE (*node) != METHOD_TYPE
5305 && TREE_CODE (*node) != FIELD_DECL
5306 && TREE_CODE (*node) != TYPE_DECL)
5307 {
5308 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5309 name);
5310 *no_add_attrs = true;
5311 return NULL_TREE;
5312 }
5313
5314 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5315 if (is_attribute_p ("regparm", name))
5316 {
5317 tree cst;
5318
5319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5320 {
5321 error ("fastcall and regparm attributes are not compatible");
5322 }
5323
5324 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5325 {
5326 error ("regparam and thiscall attributes are not compatible");
5327 }
5328
5329 cst = TREE_VALUE (args);
5330 if (TREE_CODE (cst) != INTEGER_CST)
5331 {
5332 warning (OPT_Wattributes,
5333 "%qE attribute requires an integer constant argument",
5334 name);
5335 *no_add_attrs = true;
5336 }
5337 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5338 {
5339 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5340 name, REGPARM_MAX);
5341 *no_add_attrs = true;
5342 }
5343
5344 return NULL_TREE;
5345 }
5346
5347 if (TARGET_64BIT)
5348 {
5349 /* Do not warn when emulating the MS ABI. */
5350 if ((TREE_CODE (*node) != FUNCTION_TYPE
5351 && TREE_CODE (*node) != METHOD_TYPE)
5352 || ix86_function_type_abi (*node) != MS_ABI)
5353 warning (OPT_Wattributes, "%qE attribute ignored",
5354 name);
5355 *no_add_attrs = true;
5356 return NULL_TREE;
5357 }
5358
5359 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5360 if (is_attribute_p ("fastcall", name))
5361 {
5362 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5363 {
5364 error ("fastcall and cdecl attributes are not compatible");
5365 }
5366 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5367 {
5368 error ("fastcall and stdcall attributes are not compatible");
5369 }
5370 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5371 {
5372 error ("fastcall and regparm attributes are not compatible");
5373 }
5374 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5375 {
5376 error ("fastcall and thiscall attributes are not compatible");
5377 }
5378 }
5379
5380 /* Can combine stdcall with fastcall (redundant), regparm and
5381 sseregparm. */
5382 else if (is_attribute_p ("stdcall", name))
5383 {
5384 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5385 {
5386 error ("stdcall and cdecl attributes are not compatible");
5387 }
5388 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5389 {
5390 error ("stdcall and fastcall attributes are not compatible");
5391 }
5392 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5393 {
5394 error ("stdcall and thiscall attributes are not compatible");
5395 }
5396 }
5397
5398 /* Can combine cdecl with regparm and sseregparm. */
5399 else if (is_attribute_p ("cdecl", name))
5400 {
5401 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5402 {
5403 error ("stdcall and cdecl attributes are not compatible");
5404 }
5405 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5406 {
5407 error ("fastcall and cdecl attributes are not compatible");
5408 }
5409 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5410 {
5411 error ("cdecl and thiscall attributes are not compatible");
5412 }
5413 }
5414 else if (is_attribute_p ("thiscall", name))
5415 {
5416 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5417 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5418 name);
5419 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5420 {
5421 error ("stdcall and thiscall attributes are not compatible");
5422 }
5423 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5424 {
5425 error ("fastcall and thiscall attributes are not compatible");
5426 }
5427 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5428 {
5429 error ("cdecl and thiscall attributes are not compatible");
5430 }
5431 }
5432
5433 /* Can combine sseregparm with all attributes. */
5434
5435 return NULL_TREE;
5436 }
5437
5438 /* This function determines from TYPE the calling-convention. */
5439
5440 unsigned int
5441 ix86_get_callcvt (const_tree type)
5442 {
5443 unsigned int ret = 0;
5444 bool is_stdarg;
5445 tree attrs;
5446
5447 if (TARGET_64BIT)
5448 return IX86_CALLCVT_CDECL;
5449
5450 attrs = TYPE_ATTRIBUTES (type);
5451 if (attrs != NULL_TREE)
5452 {
5453 if (lookup_attribute ("cdecl", attrs))
5454 ret |= IX86_CALLCVT_CDECL;
5455 else if (lookup_attribute ("stdcall", attrs))
5456 ret |= IX86_CALLCVT_STDCALL;
5457 else if (lookup_attribute ("fastcall", attrs))
5458 ret |= IX86_CALLCVT_FASTCALL;
5459 else if (lookup_attribute ("thiscall", attrs))
5460 ret |= IX86_CALLCVT_THISCALL;
5461
5462 /* Regparam isn't allowed for thiscall and fastcall. */
5463 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5464 {
5465 if (lookup_attribute ("regparm", attrs))
5466 ret |= IX86_CALLCVT_REGPARM;
5467 if (lookup_attribute ("sseregparm", attrs))
5468 ret |= IX86_CALLCVT_SSEREGPARM;
5469 }
5470
5471 if (IX86_BASE_CALLCVT(ret) != 0)
5472 return ret;
5473 }
5474
5475 is_stdarg = stdarg_p (type);
5476 if (TARGET_RTD && !is_stdarg)
5477 return IX86_CALLCVT_STDCALL | ret;
5478
5479 if (ret != 0
5480 || is_stdarg
5481 || TREE_CODE (type) != METHOD_TYPE
5482 || ix86_function_type_abi (type) != MS_ABI)
5483 return IX86_CALLCVT_CDECL | ret;
5484
5485 return IX86_CALLCVT_THISCALL;
5486 }
5487
5488 /* Return 0 if the attributes for two types are incompatible, 1 if they
5489 are compatible, and 2 if they are nearly compatible (which causes a
5490 warning to be generated). */
5491
5492 static int
5493 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5494 {
5495 unsigned int ccvt1, ccvt2;
5496
5497 if (TREE_CODE (type1) != FUNCTION_TYPE
5498 && TREE_CODE (type1) != METHOD_TYPE)
5499 return 1;
5500
5501 ccvt1 = ix86_get_callcvt (type1);
5502 ccvt2 = ix86_get_callcvt (type2);
5503 if (ccvt1 != ccvt2)
5504 return 0;
5505 if (ix86_function_regparm (type1, NULL)
5506 != ix86_function_regparm (type2, NULL))
5507 return 0;
5508
5509 return 1;
5510 }
5511 \f
5512 /* Return the regparm value for a function with the indicated TYPE and DECL.
5513 DECL may be NULL when calling function indirectly
5514 or considering a libcall. */
5515
5516 static int
5517 ix86_function_regparm (const_tree type, const_tree decl)
5518 {
5519 tree attr;
5520 int regparm;
5521 unsigned int ccvt;
5522
5523 if (TARGET_64BIT)
5524 return (ix86_function_type_abi (type) == SYSV_ABI
5525 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5526 ccvt = ix86_get_callcvt (type);
5527 regparm = ix86_regparm;
5528
5529 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5530 {
5531 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5532 if (attr)
5533 {
5534 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5535 return regparm;
5536 }
5537 }
5538 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5539 return 2;
5540 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5541 return 1;
5542
5543 /* Use register calling convention for local functions when possible. */
5544 if (decl
5545 && TREE_CODE (decl) == FUNCTION_DECL
5546 && optimize
5547 && !(profile_flag && !flag_fentry))
5548 {
5549 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5550 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5551 if (i && i->local && i->can_change_signature)
5552 {
5553 int local_regparm, globals = 0, regno;
5554
5555 /* Make sure no regparm register is taken by a
5556 fixed register variable. */
5557 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5558 if (fixed_regs[local_regparm])
5559 break;
5560
5561 /* We don't want to use regparm(3) for nested functions as
5562 these use a static chain pointer in the third argument. */
5563 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5564 local_regparm = 2;
5565
5566 /* In 32-bit mode save a register for the split stack. */
5567 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5568 local_regparm = 2;
5569
5570 /* Each fixed register usage increases register pressure,
5571 so less registers should be used for argument passing.
5572 This functionality can be overriden by an explicit
5573 regparm value. */
5574 for (regno = 0; regno <= DI_REG; regno++)
5575 if (fixed_regs[regno])
5576 globals++;
5577
5578 local_regparm
5579 = globals < local_regparm ? local_regparm - globals : 0;
5580
5581 if (local_regparm > regparm)
5582 regparm = local_regparm;
5583 }
5584 }
5585
5586 return regparm;
5587 }
5588
5589 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5590 DFmode (2) arguments in SSE registers for a function with the
5591 indicated TYPE and DECL. DECL may be NULL when calling function
5592 indirectly or considering a libcall. Otherwise return 0. */
5593
5594 static int
5595 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5596 {
5597 gcc_assert (!TARGET_64BIT);
5598
5599 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5600 by the sseregparm attribute. */
5601 if (TARGET_SSEREGPARM
5602 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5603 {
5604 if (!TARGET_SSE)
5605 {
5606 if (warn)
5607 {
5608 if (decl)
5609 error ("calling %qD with attribute sseregparm without "
5610 "SSE/SSE2 enabled", decl);
5611 else
5612 error ("calling %qT with attribute sseregparm without "
5613 "SSE/SSE2 enabled", type);
5614 }
5615 return 0;
5616 }
5617
5618 return 2;
5619 }
5620
5621 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5622 (and DFmode for SSE2) arguments in SSE registers. */
5623 if (decl && TARGET_SSE_MATH && optimize
5624 && !(profile_flag && !flag_fentry))
5625 {
5626 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5627 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5628 if (i && i->local && i->can_change_signature)
5629 return TARGET_SSE2 ? 2 : 1;
5630 }
5631
5632 return 0;
5633 }
5634
5635 /* Return true if EAX is live at the start of the function. Used by
5636 ix86_expand_prologue to determine if we need special help before
5637 calling allocate_stack_worker. */
5638
5639 static bool
5640 ix86_eax_live_at_start_p (void)
5641 {
5642 /* Cheat. Don't bother working forward from ix86_function_regparm
5643 to the function type to whether an actual argument is located in
5644 eax. Instead just look at cfg info, which is still close enough
5645 to correct at this point. This gives false positives for broken
5646 functions that might use uninitialized data that happens to be
5647 allocated in eax, but who cares? */
5648 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5649 }
5650
5651 static bool
5652 ix86_keep_aggregate_return_pointer (tree fntype)
5653 {
5654 tree attr;
5655
5656 if (!TARGET_64BIT)
5657 {
5658 attr = lookup_attribute ("callee_pop_aggregate_return",
5659 TYPE_ATTRIBUTES (fntype));
5660 if (attr)
5661 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5662
5663 /* For 32-bit MS-ABI the default is to keep aggregate
5664 return pointer. */
5665 if (ix86_function_type_abi (fntype) == MS_ABI)
5666 return true;
5667 }
5668 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5669 }
5670
5671 /* Value is the number of bytes of arguments automatically
5672 popped when returning from a subroutine call.
5673 FUNDECL is the declaration node of the function (as a tree),
5674 FUNTYPE is the data type of the function (as a tree),
5675 or for a library call it is an identifier node for the subroutine name.
5676 SIZE is the number of bytes of arguments passed on the stack.
5677
5678 On the 80386, the RTD insn may be used to pop them if the number
5679 of args is fixed, but if the number is variable then the caller
5680 must pop them all. RTD can't be used for library calls now
5681 because the library is compiled with the Unix compiler.
5682 Use of RTD is a selectable option, since it is incompatible with
5683 standard Unix calling sequences. If the option is not selected,
5684 the caller must always pop the args.
5685
5686 The attribute stdcall is equivalent to RTD on a per module basis. */
5687
5688 static int
5689 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5690 {
5691 unsigned int ccvt;
5692
5693 /* None of the 64-bit ABIs pop arguments. */
5694 if (TARGET_64BIT)
5695 return 0;
5696
5697 ccvt = ix86_get_callcvt (funtype);
5698
5699 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5700 | IX86_CALLCVT_THISCALL)) != 0
5701 && ! stdarg_p (funtype))
5702 return size;
5703
5704 /* Lose any fake structure return argument if it is passed on the stack. */
5705 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5706 && !ix86_keep_aggregate_return_pointer (funtype))
5707 {
5708 int nregs = ix86_function_regparm (funtype, fundecl);
5709 if (nregs == 0)
5710 return GET_MODE_SIZE (Pmode);
5711 }
5712
5713 return 0;
5714 }
5715 \f
5716 /* Argument support functions. */
5717
5718 /* Return true when register may be used to pass function parameters. */
5719 bool
5720 ix86_function_arg_regno_p (int regno)
5721 {
5722 int i;
5723 const int *parm_regs;
5724
5725 if (!TARGET_64BIT)
5726 {
5727 if (TARGET_MACHO)
5728 return (regno < REGPARM_MAX
5729 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5730 else
5731 return (regno < REGPARM_MAX
5732 || (TARGET_MMX && MMX_REGNO_P (regno)
5733 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5734 || (TARGET_SSE && SSE_REGNO_P (regno)
5735 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5736 }
5737
5738 if (TARGET_MACHO)
5739 {
5740 if (SSE_REGNO_P (regno) && TARGET_SSE)
5741 return true;
5742 }
5743 else
5744 {
5745 if (TARGET_SSE && SSE_REGNO_P (regno)
5746 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5747 return true;
5748 }
5749
5750 /* TODO: The function should depend on current function ABI but
5751 builtins.c would need updating then. Therefore we use the
5752 default ABI. */
5753
5754 /* RAX is used as hidden argument to va_arg functions. */
5755 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5756 return true;
5757
5758 if (ix86_abi == MS_ABI)
5759 parm_regs = x86_64_ms_abi_int_parameter_registers;
5760 else
5761 parm_regs = x86_64_int_parameter_registers;
5762 for (i = 0; i < (ix86_abi == MS_ABI
5763 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5764 if (regno == parm_regs[i])
5765 return true;
5766 return false;
5767 }
5768
5769 /* Return if we do not know how to pass TYPE solely in registers. */
5770
5771 static bool
5772 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5773 {
5774 if (must_pass_in_stack_var_size_or_pad (mode, type))
5775 return true;
5776
5777 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5778 The layout_type routine is crafty and tries to trick us into passing
5779 currently unsupported vector types on the stack by using TImode. */
5780 return (!TARGET_64BIT && mode == TImode
5781 && type && TREE_CODE (type) != VECTOR_TYPE);
5782 }
5783
5784 /* It returns the size, in bytes, of the area reserved for arguments passed
5785 in registers for the function represented by fndecl dependent to the used
5786 abi format. */
5787 int
5788 ix86_reg_parm_stack_space (const_tree fndecl)
5789 {
5790 enum calling_abi call_abi = SYSV_ABI;
5791 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5792 call_abi = ix86_function_abi (fndecl);
5793 else
5794 call_abi = ix86_function_type_abi (fndecl);
5795 if (TARGET_64BIT && call_abi == MS_ABI)
5796 return 32;
5797 return 0;
5798 }
5799
5800 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5801 call abi used. */
5802 enum calling_abi
5803 ix86_function_type_abi (const_tree fntype)
5804 {
5805 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5806 {
5807 enum calling_abi abi = ix86_abi;
5808 if (abi == SYSV_ABI)
5809 {
5810 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5811 abi = MS_ABI;
5812 }
5813 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5814 abi = SYSV_ABI;
5815 return abi;
5816 }
5817 return ix86_abi;
5818 }
5819
5820 static bool
5821 ix86_function_ms_hook_prologue (const_tree fn)
5822 {
5823 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5824 {
5825 if (decl_function_context (fn) != NULL_TREE)
5826 error_at (DECL_SOURCE_LOCATION (fn),
5827 "ms_hook_prologue is not compatible with nested function");
5828 else
5829 return true;
5830 }
5831 return false;
5832 }
5833
5834 static enum calling_abi
5835 ix86_function_abi (const_tree fndecl)
5836 {
5837 if (! fndecl)
5838 return ix86_abi;
5839 return ix86_function_type_abi (TREE_TYPE (fndecl));
5840 }
5841
5842 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5843 call abi used. */
5844 enum calling_abi
5845 ix86_cfun_abi (void)
5846 {
5847 if (! cfun)
5848 return ix86_abi;
5849 return cfun->machine->call_abi;
5850 }
5851
5852 /* Write the extra assembler code needed to declare a function properly. */
5853
5854 void
5855 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5856 tree decl)
5857 {
5858 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5859
5860 if (is_ms_hook)
5861 {
5862 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5863 unsigned int filler_cc = 0xcccccccc;
5864
5865 for (i = 0; i < filler_count; i += 4)
5866 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5867 }
5868
5869 #ifdef SUBTARGET_ASM_UNWIND_INIT
5870 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5871 #endif
5872
5873 ASM_OUTPUT_LABEL (asm_out_file, fname);
5874
5875 /* Output magic byte marker, if hot-patch attribute is set. */
5876 if (is_ms_hook)
5877 {
5878 if (TARGET_64BIT)
5879 {
5880 /* leaq [%rsp + 0], %rsp */
5881 asm_fprintf (asm_out_file, ASM_BYTE
5882 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5883 }
5884 else
5885 {
5886 /* movl.s %edi, %edi
5887 push %ebp
5888 movl.s %esp, %ebp */
5889 asm_fprintf (asm_out_file, ASM_BYTE
5890 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5891 }
5892 }
5893 }
5894
5895 /* regclass.c */
5896 extern void init_regs (void);
5897
5898 /* Implementation of call abi switching target hook. Specific to FNDECL
5899 the specific call register sets are set. See also
5900 ix86_conditional_register_usage for more details. */
5901 void
5902 ix86_call_abi_override (const_tree fndecl)
5903 {
5904 if (fndecl == NULL_TREE)
5905 cfun->machine->call_abi = ix86_abi;
5906 else
5907 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5908 }
5909
5910 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5911 expensive re-initialization of init_regs each time we switch function context
5912 since this is needed only during RTL expansion. */
5913 static void
5914 ix86_maybe_switch_abi (void)
5915 {
5916 if (TARGET_64BIT &&
5917 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5918 reinit_regs ();
5919 }
5920
5921 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5922 for a call to a function whose data type is FNTYPE.
5923 For a library call, FNTYPE is 0. */
5924
5925 void
5926 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5927 tree fntype, /* tree ptr for function decl */
5928 rtx libname, /* SYMBOL_REF of library name or 0 */
5929 tree fndecl,
5930 int caller)
5931 {
5932 struct cgraph_local_info *i;
5933 tree fnret_type;
5934
5935 memset (cum, 0, sizeof (*cum));
5936
5937 /* Initialize for the current callee. */
5938 if (caller)
5939 {
5940 cfun->machine->callee_pass_avx256_p = false;
5941 cfun->machine->callee_return_avx256_p = false;
5942 }
5943
5944 if (fndecl)
5945 {
5946 i = cgraph_local_info (fndecl);
5947 cum->call_abi = ix86_function_abi (fndecl);
5948 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5949 }
5950 else
5951 {
5952 i = NULL;
5953 cum->call_abi = ix86_function_type_abi (fntype);
5954 if (fntype)
5955 fnret_type = TREE_TYPE (fntype);
5956 else
5957 fnret_type = NULL;
5958 }
5959
5960 if (TARGET_VZEROUPPER && fnret_type)
5961 {
5962 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5963 false);
5964 if (function_pass_avx256_p (fnret_value))
5965 {
5966 /* The return value of this function uses 256bit AVX modes. */
5967 if (caller)
5968 cfun->machine->callee_return_avx256_p = true;
5969 else
5970 cfun->machine->caller_return_avx256_p = true;
5971 }
5972 }
5973
5974 cum->caller = caller;
5975
5976 /* Set up the number of registers to use for passing arguments. */
5977
5978 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5979 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5980 "or subtarget optimization implying it");
5981 cum->nregs = ix86_regparm;
5982 if (TARGET_64BIT)
5983 {
5984 cum->nregs = (cum->call_abi == SYSV_ABI
5985 ? X86_64_REGPARM_MAX
5986 : X86_64_MS_REGPARM_MAX);
5987 }
5988 if (TARGET_SSE)
5989 {
5990 cum->sse_nregs = SSE_REGPARM_MAX;
5991 if (TARGET_64BIT)
5992 {
5993 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5994 ? X86_64_SSE_REGPARM_MAX
5995 : X86_64_MS_SSE_REGPARM_MAX);
5996 }
5997 }
5998 if (TARGET_MMX)
5999 cum->mmx_nregs = MMX_REGPARM_MAX;
6000 cum->warn_avx = true;
6001 cum->warn_sse = true;
6002 cum->warn_mmx = true;
6003
6004 /* Because type might mismatch in between caller and callee, we need to
6005 use actual type of function for local calls.
6006 FIXME: cgraph_analyze can be told to actually record if function uses
6007 va_start so for local functions maybe_vaarg can be made aggressive
6008 helping K&R code.
6009 FIXME: once typesytem is fixed, we won't need this code anymore. */
6010 if (i && i->local && i->can_change_signature)
6011 fntype = TREE_TYPE (fndecl);
6012 cum->maybe_vaarg = (fntype
6013 ? (!prototype_p (fntype) || stdarg_p (fntype))
6014 : !libname);
6015
6016 if (!TARGET_64BIT)
6017 {
6018 /* If there are variable arguments, then we won't pass anything
6019 in registers in 32-bit mode. */
6020 if (stdarg_p (fntype))
6021 {
6022 cum->nregs = 0;
6023 cum->sse_nregs = 0;
6024 cum->mmx_nregs = 0;
6025 cum->warn_avx = 0;
6026 cum->warn_sse = 0;
6027 cum->warn_mmx = 0;
6028 return;
6029 }
6030
6031 /* Use ecx and edx registers if function has fastcall attribute,
6032 else look for regparm information. */
6033 if (fntype)
6034 {
6035 unsigned int ccvt = ix86_get_callcvt (fntype);
6036 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6037 {
6038 cum->nregs = 1;
6039 cum->fastcall = 1; /* Same first register as in fastcall. */
6040 }
6041 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6042 {
6043 cum->nregs = 2;
6044 cum->fastcall = 1;
6045 }
6046 else
6047 cum->nregs = ix86_function_regparm (fntype, fndecl);
6048 }
6049
6050 /* Set up the number of SSE registers used for passing SFmode
6051 and DFmode arguments. Warn for mismatching ABI. */
6052 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6053 }
6054 }
6055
6056 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6057 But in the case of vector types, it is some vector mode.
6058
6059 When we have only some of our vector isa extensions enabled, then there
6060 are some modes for which vector_mode_supported_p is false. For these
6061 modes, the generic vector support in gcc will choose some non-vector mode
6062 in order to implement the type. By computing the natural mode, we'll
6063 select the proper ABI location for the operand and not depend on whatever
6064 the middle-end decides to do with these vector types.
6065
6066 The midde-end can't deal with the vector types > 16 bytes. In this
6067 case, we return the original mode and warn ABI change if CUM isn't
6068 NULL. */
6069
6070 static enum machine_mode
6071 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6072 {
6073 enum machine_mode mode = TYPE_MODE (type);
6074
6075 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6076 {
6077 HOST_WIDE_INT size = int_size_in_bytes (type);
6078 if ((size == 8 || size == 16 || size == 32)
6079 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6080 && TYPE_VECTOR_SUBPARTS (type) > 1)
6081 {
6082 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6083
6084 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6085 mode = MIN_MODE_VECTOR_FLOAT;
6086 else
6087 mode = MIN_MODE_VECTOR_INT;
6088
6089 /* Get the mode which has this inner mode and number of units. */
6090 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6091 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6092 && GET_MODE_INNER (mode) == innermode)
6093 {
6094 if (size == 32 && !TARGET_AVX)
6095 {
6096 static bool warnedavx;
6097
6098 if (cum
6099 && !warnedavx
6100 && cum->warn_avx)
6101 {
6102 warnedavx = true;
6103 warning (0, "AVX vector argument without AVX "
6104 "enabled changes the ABI");
6105 }
6106 return TYPE_MODE (type);
6107 }
6108 else
6109 return mode;
6110 }
6111
6112 gcc_unreachable ();
6113 }
6114 }
6115
6116 return mode;
6117 }
6118
6119 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6120 this may not agree with the mode that the type system has chosen for the
6121 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6122 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6123
6124 static rtx
6125 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6126 unsigned int regno)
6127 {
6128 rtx tmp;
6129
6130 if (orig_mode != BLKmode)
6131 tmp = gen_rtx_REG (orig_mode, regno);
6132 else
6133 {
6134 tmp = gen_rtx_REG (mode, regno);
6135 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6136 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6137 }
6138
6139 return tmp;
6140 }
6141
6142 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6143 of this code is to classify each 8bytes of incoming argument by the register
6144 class and assign registers accordingly. */
6145
6146 /* Return the union class of CLASS1 and CLASS2.
6147 See the x86-64 PS ABI for details. */
6148
6149 static enum x86_64_reg_class
6150 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6151 {
6152 /* Rule #1: If both classes are equal, this is the resulting class. */
6153 if (class1 == class2)
6154 return class1;
6155
6156 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6157 the other class. */
6158 if (class1 == X86_64_NO_CLASS)
6159 return class2;
6160 if (class2 == X86_64_NO_CLASS)
6161 return class1;
6162
6163 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6164 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6165 return X86_64_MEMORY_CLASS;
6166
6167 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6168 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6169 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6170 return X86_64_INTEGERSI_CLASS;
6171 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6172 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6173 return X86_64_INTEGER_CLASS;
6174
6175 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6176 MEMORY is used. */
6177 if (class1 == X86_64_X87_CLASS
6178 || class1 == X86_64_X87UP_CLASS
6179 || class1 == X86_64_COMPLEX_X87_CLASS
6180 || class2 == X86_64_X87_CLASS
6181 || class2 == X86_64_X87UP_CLASS
6182 || class2 == X86_64_COMPLEX_X87_CLASS)
6183 return X86_64_MEMORY_CLASS;
6184
6185 /* Rule #6: Otherwise class SSE is used. */
6186 return X86_64_SSE_CLASS;
6187 }
6188
6189 /* Classify the argument of type TYPE and mode MODE.
6190 CLASSES will be filled by the register class used to pass each word
6191 of the operand. The number of words is returned. In case the parameter
6192 should be passed in memory, 0 is returned. As a special case for zero
6193 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6194
6195 BIT_OFFSET is used internally for handling records and specifies offset
6196 of the offset in bits modulo 256 to avoid overflow cases.
6197
6198 See the x86-64 PS ABI for details.
6199 */
6200
6201 static int
6202 classify_argument (enum machine_mode mode, const_tree type,
6203 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6204 {
6205 HOST_WIDE_INT bytes =
6206 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6207 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6208
6209 /* Variable sized entities are always passed/returned in memory. */
6210 if (bytes < 0)
6211 return 0;
6212
6213 if (mode != VOIDmode
6214 && targetm.calls.must_pass_in_stack (mode, type))
6215 return 0;
6216
6217 if (type && AGGREGATE_TYPE_P (type))
6218 {
6219 int i;
6220 tree field;
6221 enum x86_64_reg_class subclasses[MAX_CLASSES];
6222
6223 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6224 if (bytes > 32)
6225 return 0;
6226
6227 for (i = 0; i < words; i++)
6228 classes[i] = X86_64_NO_CLASS;
6229
6230 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6231 signalize memory class, so handle it as special case. */
6232 if (!words)
6233 {
6234 classes[0] = X86_64_NO_CLASS;
6235 return 1;
6236 }
6237
6238 /* Classify each field of record and merge classes. */
6239 switch (TREE_CODE (type))
6240 {
6241 case RECORD_TYPE:
6242 /* And now merge the fields of structure. */
6243 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6244 {
6245 if (TREE_CODE (field) == FIELD_DECL)
6246 {
6247 int num;
6248
6249 if (TREE_TYPE (field) == error_mark_node)
6250 continue;
6251
6252 /* Bitfields are always classified as integer. Handle them
6253 early, since later code would consider them to be
6254 misaligned integers. */
6255 if (DECL_BIT_FIELD (field))
6256 {
6257 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6258 i < ((int_bit_position (field) + (bit_offset % 64))
6259 + tree_low_cst (DECL_SIZE (field), 0)
6260 + 63) / 8 / 8; i++)
6261 classes[i] =
6262 merge_classes (X86_64_INTEGER_CLASS,
6263 classes[i]);
6264 }
6265 else
6266 {
6267 int pos;
6268
6269 type = TREE_TYPE (field);
6270
6271 /* Flexible array member is ignored. */
6272 if (TYPE_MODE (type) == BLKmode
6273 && TREE_CODE (type) == ARRAY_TYPE
6274 && TYPE_SIZE (type) == NULL_TREE
6275 && TYPE_DOMAIN (type) != NULL_TREE
6276 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6277 == NULL_TREE))
6278 {
6279 static bool warned;
6280
6281 if (!warned && warn_psabi)
6282 {
6283 warned = true;
6284 inform (input_location,
6285 "the ABI of passing struct with"
6286 " a flexible array member has"
6287 " changed in GCC 4.4");
6288 }
6289 continue;
6290 }
6291 num = classify_argument (TYPE_MODE (type), type,
6292 subclasses,
6293 (int_bit_position (field)
6294 + bit_offset) % 256);
6295 if (!num)
6296 return 0;
6297 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6298 for (i = 0; i < num && (i + pos) < words; i++)
6299 classes[i + pos] =
6300 merge_classes (subclasses[i], classes[i + pos]);
6301 }
6302 }
6303 }
6304 break;
6305
6306 case ARRAY_TYPE:
6307 /* Arrays are handled as small records. */
6308 {
6309 int num;
6310 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6311 TREE_TYPE (type), subclasses, bit_offset);
6312 if (!num)
6313 return 0;
6314
6315 /* The partial classes are now full classes. */
6316 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6317 subclasses[0] = X86_64_SSE_CLASS;
6318 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6319 && !((bit_offset % 64) == 0 && bytes == 4))
6320 subclasses[0] = X86_64_INTEGER_CLASS;
6321
6322 for (i = 0; i < words; i++)
6323 classes[i] = subclasses[i % num];
6324
6325 break;
6326 }
6327 case UNION_TYPE:
6328 case QUAL_UNION_TYPE:
6329 /* Unions are similar to RECORD_TYPE but offset is always 0.
6330 */
6331 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6332 {
6333 if (TREE_CODE (field) == FIELD_DECL)
6334 {
6335 int num;
6336
6337 if (TREE_TYPE (field) == error_mark_node)
6338 continue;
6339
6340 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6341 TREE_TYPE (field), subclasses,
6342 bit_offset);
6343 if (!num)
6344 return 0;
6345 for (i = 0; i < num; i++)
6346 classes[i] = merge_classes (subclasses[i], classes[i]);
6347 }
6348 }
6349 break;
6350
6351 default:
6352 gcc_unreachable ();
6353 }
6354
6355 if (words > 2)
6356 {
6357 /* When size > 16 bytes, if the first one isn't
6358 X86_64_SSE_CLASS or any other ones aren't
6359 X86_64_SSEUP_CLASS, everything should be passed in
6360 memory. */
6361 if (classes[0] != X86_64_SSE_CLASS)
6362 return 0;
6363
6364 for (i = 1; i < words; i++)
6365 if (classes[i] != X86_64_SSEUP_CLASS)
6366 return 0;
6367 }
6368
6369 /* Final merger cleanup. */
6370 for (i = 0; i < words; i++)
6371 {
6372 /* If one class is MEMORY, everything should be passed in
6373 memory. */
6374 if (classes[i] == X86_64_MEMORY_CLASS)
6375 return 0;
6376
6377 /* The X86_64_SSEUP_CLASS should be always preceded by
6378 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6379 if (classes[i] == X86_64_SSEUP_CLASS
6380 && classes[i - 1] != X86_64_SSE_CLASS
6381 && classes[i - 1] != X86_64_SSEUP_CLASS)
6382 {
6383 /* The first one should never be X86_64_SSEUP_CLASS. */
6384 gcc_assert (i != 0);
6385 classes[i] = X86_64_SSE_CLASS;
6386 }
6387
6388 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6389 everything should be passed in memory. */
6390 if (classes[i] == X86_64_X87UP_CLASS
6391 && (classes[i - 1] != X86_64_X87_CLASS))
6392 {
6393 static bool warned;
6394
6395 /* The first one should never be X86_64_X87UP_CLASS. */
6396 gcc_assert (i != 0);
6397 if (!warned && warn_psabi)
6398 {
6399 warned = true;
6400 inform (input_location,
6401 "the ABI of passing union with long double"
6402 " has changed in GCC 4.4");
6403 }
6404 return 0;
6405 }
6406 }
6407 return words;
6408 }
6409
6410 /* Compute alignment needed. We align all types to natural boundaries with
6411 exception of XFmode that is aligned to 64bits. */
6412 if (mode != VOIDmode && mode != BLKmode)
6413 {
6414 int mode_alignment = GET_MODE_BITSIZE (mode);
6415
6416 if (mode == XFmode)
6417 mode_alignment = 128;
6418 else if (mode == XCmode)
6419 mode_alignment = 256;
6420 if (COMPLEX_MODE_P (mode))
6421 mode_alignment /= 2;
6422 /* Misaligned fields are always returned in memory. */
6423 if (bit_offset % mode_alignment)
6424 return 0;
6425 }
6426
6427 /* for V1xx modes, just use the base mode */
6428 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6429 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6430 mode = GET_MODE_INNER (mode);
6431
6432 /* Classification of atomic types. */
6433 switch (mode)
6434 {
6435 case SDmode:
6436 case DDmode:
6437 classes[0] = X86_64_SSE_CLASS;
6438 return 1;
6439 case TDmode:
6440 classes[0] = X86_64_SSE_CLASS;
6441 classes[1] = X86_64_SSEUP_CLASS;
6442 return 2;
6443 case DImode:
6444 case SImode:
6445 case HImode:
6446 case QImode:
6447 case CSImode:
6448 case CHImode:
6449 case CQImode:
6450 {
6451 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6452
6453 if (size <= 32)
6454 {
6455 classes[0] = X86_64_INTEGERSI_CLASS;
6456 return 1;
6457 }
6458 else if (size <= 64)
6459 {
6460 classes[0] = X86_64_INTEGER_CLASS;
6461 return 1;
6462 }
6463 else if (size <= 64+32)
6464 {
6465 classes[0] = X86_64_INTEGER_CLASS;
6466 classes[1] = X86_64_INTEGERSI_CLASS;
6467 return 2;
6468 }
6469 else if (size <= 64+64)
6470 {
6471 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6472 return 2;
6473 }
6474 else
6475 gcc_unreachable ();
6476 }
6477 case CDImode:
6478 case TImode:
6479 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6480 return 2;
6481 case COImode:
6482 case OImode:
6483 /* OImode shouldn't be used directly. */
6484 gcc_unreachable ();
6485 case CTImode:
6486 return 0;
6487 case SFmode:
6488 if (!(bit_offset % 64))
6489 classes[0] = X86_64_SSESF_CLASS;
6490 else
6491 classes[0] = X86_64_SSE_CLASS;
6492 return 1;
6493 case DFmode:
6494 classes[0] = X86_64_SSEDF_CLASS;
6495 return 1;
6496 case XFmode:
6497 classes[0] = X86_64_X87_CLASS;
6498 classes[1] = X86_64_X87UP_CLASS;
6499 return 2;
6500 case TFmode:
6501 classes[0] = X86_64_SSE_CLASS;
6502 classes[1] = X86_64_SSEUP_CLASS;
6503 return 2;
6504 case SCmode:
6505 classes[0] = X86_64_SSE_CLASS;
6506 if (!(bit_offset % 64))
6507 return 1;
6508 else
6509 {
6510 static bool warned;
6511
6512 if (!warned && warn_psabi)
6513 {
6514 warned = true;
6515 inform (input_location,
6516 "the ABI of passing structure with complex float"
6517 " member has changed in GCC 4.4");
6518 }
6519 classes[1] = X86_64_SSESF_CLASS;
6520 return 2;
6521 }
6522 case DCmode:
6523 classes[0] = X86_64_SSEDF_CLASS;
6524 classes[1] = X86_64_SSEDF_CLASS;
6525 return 2;
6526 case XCmode:
6527 classes[0] = X86_64_COMPLEX_X87_CLASS;
6528 return 1;
6529 case TCmode:
6530 /* This modes is larger than 16 bytes. */
6531 return 0;
6532 case V8SFmode:
6533 case V8SImode:
6534 case V32QImode:
6535 case V16HImode:
6536 case V4DFmode:
6537 case V4DImode:
6538 classes[0] = X86_64_SSE_CLASS;
6539 classes[1] = X86_64_SSEUP_CLASS;
6540 classes[2] = X86_64_SSEUP_CLASS;
6541 classes[3] = X86_64_SSEUP_CLASS;
6542 return 4;
6543 case V4SFmode:
6544 case V4SImode:
6545 case V16QImode:
6546 case V8HImode:
6547 case V2DFmode:
6548 case V2DImode:
6549 classes[0] = X86_64_SSE_CLASS;
6550 classes[1] = X86_64_SSEUP_CLASS;
6551 return 2;
6552 case V1TImode:
6553 case V1DImode:
6554 case V2SFmode:
6555 case V2SImode:
6556 case V4HImode:
6557 case V8QImode:
6558 classes[0] = X86_64_SSE_CLASS;
6559 return 1;
6560 case BLKmode:
6561 case VOIDmode:
6562 return 0;
6563 default:
6564 gcc_assert (VECTOR_MODE_P (mode));
6565
6566 if (bytes > 16)
6567 return 0;
6568
6569 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6570
6571 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6572 classes[0] = X86_64_INTEGERSI_CLASS;
6573 else
6574 classes[0] = X86_64_INTEGER_CLASS;
6575 classes[1] = X86_64_INTEGER_CLASS;
6576 return 1 + (bytes > 8);
6577 }
6578 }
6579
6580 /* Examine the argument and return set number of register required in each
6581 class. Return 0 iff parameter should be passed in memory. */
6582 static int
6583 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6584 int *int_nregs, int *sse_nregs)
6585 {
6586 enum x86_64_reg_class regclass[MAX_CLASSES];
6587 int n = classify_argument (mode, type, regclass, 0);
6588
6589 *int_nregs = 0;
6590 *sse_nregs = 0;
6591 if (!n)
6592 return 0;
6593 for (n--; n >= 0; n--)
6594 switch (regclass[n])
6595 {
6596 case X86_64_INTEGER_CLASS:
6597 case X86_64_INTEGERSI_CLASS:
6598 (*int_nregs)++;
6599 break;
6600 case X86_64_SSE_CLASS:
6601 case X86_64_SSESF_CLASS:
6602 case X86_64_SSEDF_CLASS:
6603 (*sse_nregs)++;
6604 break;
6605 case X86_64_NO_CLASS:
6606 case X86_64_SSEUP_CLASS:
6607 break;
6608 case X86_64_X87_CLASS:
6609 case X86_64_X87UP_CLASS:
6610 if (!in_return)
6611 return 0;
6612 break;
6613 case X86_64_COMPLEX_X87_CLASS:
6614 return in_return ? 2 : 0;
6615 case X86_64_MEMORY_CLASS:
6616 gcc_unreachable ();
6617 }
6618 return 1;
6619 }
6620
6621 /* Construct container for the argument used by GCC interface. See
6622 FUNCTION_ARG for the detailed description. */
6623
6624 static rtx
6625 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6626 const_tree type, int in_return, int nintregs, int nsseregs,
6627 const int *intreg, int sse_regno)
6628 {
6629 /* The following variables hold the static issued_error state. */
6630 static bool issued_sse_arg_error;
6631 static bool issued_sse_ret_error;
6632 static bool issued_x87_ret_error;
6633
6634 enum machine_mode tmpmode;
6635 int bytes =
6636 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6637 enum x86_64_reg_class regclass[MAX_CLASSES];
6638 int n;
6639 int i;
6640 int nexps = 0;
6641 int needed_sseregs, needed_intregs;
6642 rtx exp[MAX_CLASSES];
6643 rtx ret;
6644
6645 n = classify_argument (mode, type, regclass, 0);
6646 if (!n)
6647 return NULL;
6648 if (!examine_argument (mode, type, in_return, &needed_intregs,
6649 &needed_sseregs))
6650 return NULL;
6651 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6652 return NULL;
6653
6654 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6655 some less clueful developer tries to use floating-point anyway. */
6656 if (needed_sseregs && !TARGET_SSE)
6657 {
6658 if (in_return)
6659 {
6660 if (!issued_sse_ret_error)
6661 {
6662 error ("SSE register return with SSE disabled");
6663 issued_sse_ret_error = true;
6664 }
6665 }
6666 else if (!issued_sse_arg_error)
6667 {
6668 error ("SSE register argument with SSE disabled");
6669 issued_sse_arg_error = true;
6670 }
6671 return NULL;
6672 }
6673
6674 /* Likewise, error if the ABI requires us to return values in the
6675 x87 registers and the user specified -mno-80387. */
6676 if (!TARGET_80387 && in_return)
6677 for (i = 0; i < n; i++)
6678 if (regclass[i] == X86_64_X87_CLASS
6679 || regclass[i] == X86_64_X87UP_CLASS
6680 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6681 {
6682 if (!issued_x87_ret_error)
6683 {
6684 error ("x87 register return with x87 disabled");
6685 issued_x87_ret_error = true;
6686 }
6687 return NULL;
6688 }
6689
6690 /* First construct simple cases. Avoid SCmode, since we want to use
6691 single register to pass this type. */
6692 if (n == 1 && mode != SCmode)
6693 switch (regclass[0])
6694 {
6695 case X86_64_INTEGER_CLASS:
6696 case X86_64_INTEGERSI_CLASS:
6697 return gen_rtx_REG (mode, intreg[0]);
6698 case X86_64_SSE_CLASS:
6699 case X86_64_SSESF_CLASS:
6700 case X86_64_SSEDF_CLASS:
6701 if (mode != BLKmode)
6702 return gen_reg_or_parallel (mode, orig_mode,
6703 SSE_REGNO (sse_regno));
6704 break;
6705 case X86_64_X87_CLASS:
6706 case X86_64_COMPLEX_X87_CLASS:
6707 return gen_rtx_REG (mode, FIRST_STACK_REG);
6708 case X86_64_NO_CLASS:
6709 /* Zero sized array, struct or class. */
6710 return NULL;
6711 default:
6712 gcc_unreachable ();
6713 }
6714 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6715 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6716 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6717 if (n == 4
6718 && regclass[0] == X86_64_SSE_CLASS
6719 && regclass[1] == X86_64_SSEUP_CLASS
6720 && regclass[2] == X86_64_SSEUP_CLASS
6721 && regclass[3] == X86_64_SSEUP_CLASS
6722 && mode != BLKmode)
6723 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6724
6725 if (n == 2
6726 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6727 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6728 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6729 && regclass[1] == X86_64_INTEGER_CLASS
6730 && (mode == CDImode || mode == TImode || mode == TFmode)
6731 && intreg[0] + 1 == intreg[1])
6732 return gen_rtx_REG (mode, intreg[0]);
6733
6734 /* Otherwise figure out the entries of the PARALLEL. */
6735 for (i = 0; i < n; i++)
6736 {
6737 int pos;
6738
6739 switch (regclass[i])
6740 {
6741 case X86_64_NO_CLASS:
6742 break;
6743 case X86_64_INTEGER_CLASS:
6744 case X86_64_INTEGERSI_CLASS:
6745 /* Merge TImodes on aligned occasions here too. */
6746 if (i * 8 + 8 > bytes)
6747 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6748 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6749 tmpmode = SImode;
6750 else
6751 tmpmode = DImode;
6752 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6753 if (tmpmode == BLKmode)
6754 tmpmode = DImode;
6755 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6756 gen_rtx_REG (tmpmode, *intreg),
6757 GEN_INT (i*8));
6758 intreg++;
6759 break;
6760 case X86_64_SSESF_CLASS:
6761 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6762 gen_rtx_REG (SFmode,
6763 SSE_REGNO (sse_regno)),
6764 GEN_INT (i*8));
6765 sse_regno++;
6766 break;
6767 case X86_64_SSEDF_CLASS:
6768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6769 gen_rtx_REG (DFmode,
6770 SSE_REGNO (sse_regno)),
6771 GEN_INT (i*8));
6772 sse_regno++;
6773 break;
6774 case X86_64_SSE_CLASS:
6775 pos = i;
6776 switch (n)
6777 {
6778 case 1:
6779 tmpmode = DImode;
6780 break;
6781 case 2:
6782 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6783 {
6784 tmpmode = TImode;
6785 i++;
6786 }
6787 else
6788 tmpmode = DImode;
6789 break;
6790 case 4:
6791 gcc_assert (i == 0
6792 && regclass[1] == X86_64_SSEUP_CLASS
6793 && regclass[2] == X86_64_SSEUP_CLASS
6794 && regclass[3] == X86_64_SSEUP_CLASS);
6795 tmpmode = OImode;
6796 i += 3;
6797 break;
6798 default:
6799 gcc_unreachable ();
6800 }
6801 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6802 gen_rtx_REG (tmpmode,
6803 SSE_REGNO (sse_regno)),
6804 GEN_INT (pos*8));
6805 sse_regno++;
6806 break;
6807 default:
6808 gcc_unreachable ();
6809 }
6810 }
6811
6812 /* Empty aligned struct, union or class. */
6813 if (nexps == 0)
6814 return NULL;
6815
6816 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6817 for (i = 0; i < nexps; i++)
6818 XVECEXP (ret, 0, i) = exp [i];
6819 return ret;
6820 }
6821
6822 /* Update the data in CUM to advance over an argument of mode MODE
6823 and data type TYPE. (TYPE is null for libcalls where that information
6824 may not be available.) */
6825
6826 static void
6827 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6828 const_tree type, HOST_WIDE_INT bytes,
6829 HOST_WIDE_INT words)
6830 {
6831 switch (mode)
6832 {
6833 default:
6834 break;
6835
6836 case BLKmode:
6837 if (bytes < 0)
6838 break;
6839 /* FALLTHRU */
6840
6841 case DImode:
6842 case SImode:
6843 case HImode:
6844 case QImode:
6845 cum->words += words;
6846 cum->nregs -= words;
6847 cum->regno += words;
6848
6849 if (cum->nregs <= 0)
6850 {
6851 cum->nregs = 0;
6852 cum->regno = 0;
6853 }
6854 break;
6855
6856 case OImode:
6857 /* OImode shouldn't be used directly. */
6858 gcc_unreachable ();
6859
6860 case DFmode:
6861 if (cum->float_in_sse < 2)
6862 break;
6863 case SFmode:
6864 if (cum->float_in_sse < 1)
6865 break;
6866 /* FALLTHRU */
6867
6868 case V8SFmode:
6869 case V8SImode:
6870 case V32QImode:
6871 case V16HImode:
6872 case V4DFmode:
6873 case V4DImode:
6874 case TImode:
6875 case V16QImode:
6876 case V8HImode:
6877 case V4SImode:
6878 case V2DImode:
6879 case V4SFmode:
6880 case V2DFmode:
6881 if (!type || !AGGREGATE_TYPE_P (type))
6882 {
6883 cum->sse_words += words;
6884 cum->sse_nregs -= 1;
6885 cum->sse_regno += 1;
6886 if (cum->sse_nregs <= 0)
6887 {
6888 cum->sse_nregs = 0;
6889 cum->sse_regno = 0;
6890 }
6891 }
6892 break;
6893
6894 case V8QImode:
6895 case V4HImode:
6896 case V2SImode:
6897 case V2SFmode:
6898 case V1TImode:
6899 case V1DImode:
6900 if (!type || !AGGREGATE_TYPE_P (type))
6901 {
6902 cum->mmx_words += words;
6903 cum->mmx_nregs -= 1;
6904 cum->mmx_regno += 1;
6905 if (cum->mmx_nregs <= 0)
6906 {
6907 cum->mmx_nregs = 0;
6908 cum->mmx_regno = 0;
6909 }
6910 }
6911 break;
6912 }
6913 }
6914
6915 static void
6916 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6917 const_tree type, HOST_WIDE_INT words, bool named)
6918 {
6919 int int_nregs, sse_nregs;
6920
6921 /* Unnamed 256bit vector mode parameters are passed on stack. */
6922 if (!named && VALID_AVX256_REG_MODE (mode))
6923 return;
6924
6925 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6926 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6927 {
6928 cum->nregs -= int_nregs;
6929 cum->sse_nregs -= sse_nregs;
6930 cum->regno += int_nregs;
6931 cum->sse_regno += sse_nregs;
6932 }
6933 else
6934 {
6935 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6936 cum->words = (cum->words + align - 1) & ~(align - 1);
6937 cum->words += words;
6938 }
6939 }
6940
6941 static void
6942 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6943 HOST_WIDE_INT words)
6944 {
6945 /* Otherwise, this should be passed indirect. */
6946 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6947
6948 cum->words += words;
6949 if (cum->nregs > 0)
6950 {
6951 cum->nregs -= 1;
6952 cum->regno += 1;
6953 }
6954 }
6955
6956 /* Update the data in CUM to advance over an argument of mode MODE and
6957 data type TYPE. (TYPE is null for libcalls where that information
6958 may not be available.) */
6959
6960 static void
6961 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6962 const_tree type, bool named)
6963 {
6964 HOST_WIDE_INT bytes, words;
6965
6966 if (mode == BLKmode)
6967 bytes = int_size_in_bytes (type);
6968 else
6969 bytes = GET_MODE_SIZE (mode);
6970 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6971
6972 if (type)
6973 mode = type_natural_mode (type, NULL);
6974
6975 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6976 function_arg_advance_ms_64 (cum, bytes, words);
6977 else if (TARGET_64BIT)
6978 function_arg_advance_64 (cum, mode, type, words, named);
6979 else
6980 function_arg_advance_32 (cum, mode, type, bytes, words);
6981 }
6982
6983 /* Define where to put the arguments to a function.
6984 Value is zero to push the argument on the stack,
6985 or a hard register in which to store the argument.
6986
6987 MODE is the argument's machine mode.
6988 TYPE is the data type of the argument (as a tree).
6989 This is null for libcalls where that information may
6990 not be available.
6991 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6992 the preceding args and about the function being called.
6993 NAMED is nonzero if this argument is a named parameter
6994 (otherwise it is an extra parameter matching an ellipsis). */
6995
6996 static rtx
6997 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6998 enum machine_mode orig_mode, const_tree type,
6999 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7000 {
7001 static bool warnedsse, warnedmmx;
7002
7003 /* Avoid the AL settings for the Unix64 ABI. */
7004 if (mode == VOIDmode)
7005 return constm1_rtx;
7006
7007 switch (mode)
7008 {
7009 default:
7010 break;
7011
7012 case BLKmode:
7013 if (bytes < 0)
7014 break;
7015 /* FALLTHRU */
7016 case DImode:
7017 case SImode:
7018 case HImode:
7019 case QImode:
7020 if (words <= cum->nregs)
7021 {
7022 int regno = cum->regno;
7023
7024 /* Fastcall allocates the first two DWORD (SImode) or
7025 smaller arguments to ECX and EDX if it isn't an
7026 aggregate type . */
7027 if (cum->fastcall)
7028 {
7029 if (mode == BLKmode
7030 || mode == DImode
7031 || (type && AGGREGATE_TYPE_P (type)))
7032 break;
7033
7034 /* ECX not EAX is the first allocated register. */
7035 if (regno == AX_REG)
7036 regno = CX_REG;
7037 }
7038 return gen_rtx_REG (mode, regno);
7039 }
7040 break;
7041
7042 case DFmode:
7043 if (cum->float_in_sse < 2)
7044 break;
7045 case SFmode:
7046 if (cum->float_in_sse < 1)
7047 break;
7048 /* FALLTHRU */
7049 case TImode:
7050 /* In 32bit, we pass TImode in xmm registers. */
7051 case V16QImode:
7052 case V8HImode:
7053 case V4SImode:
7054 case V2DImode:
7055 case V4SFmode:
7056 case V2DFmode:
7057 if (!type || !AGGREGATE_TYPE_P (type))
7058 {
7059 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7060 {
7061 warnedsse = true;
7062 warning (0, "SSE vector argument without SSE enabled "
7063 "changes the ABI");
7064 }
7065 if (cum->sse_nregs)
7066 return gen_reg_or_parallel (mode, orig_mode,
7067 cum->sse_regno + FIRST_SSE_REG);
7068 }
7069 break;
7070
7071 case OImode:
7072 /* OImode shouldn't be used directly. */
7073 gcc_unreachable ();
7074
7075 case V8SFmode:
7076 case V8SImode:
7077 case V32QImode:
7078 case V16HImode:
7079 case V4DFmode:
7080 case V4DImode:
7081 if (!type || !AGGREGATE_TYPE_P (type))
7082 {
7083 if (cum->sse_nregs)
7084 return gen_reg_or_parallel (mode, orig_mode,
7085 cum->sse_regno + FIRST_SSE_REG);
7086 }
7087 break;
7088
7089 case V8QImode:
7090 case V4HImode:
7091 case V2SImode:
7092 case V2SFmode:
7093 case V1TImode:
7094 case V1DImode:
7095 if (!type || !AGGREGATE_TYPE_P (type))
7096 {
7097 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7098 {
7099 warnedmmx = true;
7100 warning (0, "MMX vector argument without MMX enabled "
7101 "changes the ABI");
7102 }
7103 if (cum->mmx_nregs)
7104 return gen_reg_or_parallel (mode, orig_mode,
7105 cum->mmx_regno + FIRST_MMX_REG);
7106 }
7107 break;
7108 }
7109
7110 return NULL_RTX;
7111 }
7112
7113 static rtx
7114 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7115 enum machine_mode orig_mode, const_tree type, bool named)
7116 {
7117 /* Handle a hidden AL argument containing number of registers
7118 for varargs x86-64 functions. */
7119 if (mode == VOIDmode)
7120 return GEN_INT (cum->maybe_vaarg
7121 ? (cum->sse_nregs < 0
7122 ? X86_64_SSE_REGPARM_MAX
7123 : cum->sse_regno)
7124 : -1);
7125
7126 switch (mode)
7127 {
7128 default:
7129 break;
7130
7131 case V8SFmode:
7132 case V8SImode:
7133 case V32QImode:
7134 case V16HImode:
7135 case V4DFmode:
7136 case V4DImode:
7137 /* Unnamed 256bit vector mode parameters are passed on stack. */
7138 if (!named)
7139 return NULL;
7140 break;
7141 }
7142
7143 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7144 cum->sse_nregs,
7145 &x86_64_int_parameter_registers [cum->regno],
7146 cum->sse_regno);
7147 }
7148
7149 static rtx
7150 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7151 enum machine_mode orig_mode, bool named,
7152 HOST_WIDE_INT bytes)
7153 {
7154 unsigned int regno;
7155
7156 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7157 We use value of -2 to specify that current function call is MSABI. */
7158 if (mode == VOIDmode)
7159 return GEN_INT (-2);
7160
7161 /* If we've run out of registers, it goes on the stack. */
7162 if (cum->nregs == 0)
7163 return NULL_RTX;
7164
7165 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7166
7167 /* Only floating point modes are passed in anything but integer regs. */
7168 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7169 {
7170 if (named)
7171 regno = cum->regno + FIRST_SSE_REG;
7172 else
7173 {
7174 rtx t1, t2;
7175
7176 /* Unnamed floating parameters are passed in both the
7177 SSE and integer registers. */
7178 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7179 t2 = gen_rtx_REG (mode, regno);
7180 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7181 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7182 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7183 }
7184 }
7185 /* Handle aggregated types passed in register. */
7186 if (orig_mode == BLKmode)
7187 {
7188 if (bytes > 0 && bytes <= 8)
7189 mode = (bytes > 4 ? DImode : SImode);
7190 if (mode == BLKmode)
7191 mode = DImode;
7192 }
7193
7194 return gen_reg_or_parallel (mode, orig_mode, regno);
7195 }
7196
7197 /* Return where to put the arguments to a function.
7198 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7199
7200 MODE is the argument's machine mode. TYPE is the data type of the
7201 argument. It is null for libcalls where that information may not be
7202 available. CUM gives information about the preceding args and about
7203 the function being called. NAMED is nonzero if this argument is a
7204 named parameter (otherwise it is an extra parameter matching an
7205 ellipsis). */
7206
7207 static rtx
7208 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7209 const_tree type, bool named)
7210 {
7211 enum machine_mode mode = omode;
7212 HOST_WIDE_INT bytes, words;
7213 rtx arg;
7214
7215 if (mode == BLKmode)
7216 bytes = int_size_in_bytes (type);
7217 else
7218 bytes = GET_MODE_SIZE (mode);
7219 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7220
7221 /* To simplify the code below, represent vector types with a vector mode
7222 even if MMX/SSE are not active. */
7223 if (type && TREE_CODE (type) == VECTOR_TYPE)
7224 mode = type_natural_mode (type, cum);
7225
7226 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7227 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7228 else if (TARGET_64BIT)
7229 arg = function_arg_64 (cum, mode, omode, type, named);
7230 else
7231 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7232
7233 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7234 {
7235 /* This argument uses 256bit AVX modes. */
7236 if (cum->caller)
7237 cfun->machine->callee_pass_avx256_p = true;
7238 else
7239 cfun->machine->caller_pass_avx256_p = true;
7240 }
7241
7242 return arg;
7243 }
7244
7245 /* A C expression that indicates when an argument must be passed by
7246 reference. If nonzero for an argument, a copy of that argument is
7247 made in memory and a pointer to the argument is passed instead of
7248 the argument itself. The pointer is passed in whatever way is
7249 appropriate for passing a pointer to that type. */
7250
7251 static bool
7252 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7253 enum machine_mode mode ATTRIBUTE_UNUSED,
7254 const_tree type, bool named ATTRIBUTE_UNUSED)
7255 {
7256 /* See Windows x64 Software Convention. */
7257 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7258 {
7259 int msize = (int) GET_MODE_SIZE (mode);
7260 if (type)
7261 {
7262 /* Arrays are passed by reference. */
7263 if (TREE_CODE (type) == ARRAY_TYPE)
7264 return true;
7265
7266 if (AGGREGATE_TYPE_P (type))
7267 {
7268 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7269 are passed by reference. */
7270 msize = int_size_in_bytes (type);
7271 }
7272 }
7273
7274 /* __m128 is passed by reference. */
7275 switch (msize) {
7276 case 1: case 2: case 4: case 8:
7277 break;
7278 default:
7279 return true;
7280 }
7281 }
7282 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7283 return 1;
7284
7285 return 0;
7286 }
7287
7288 /* Return true when TYPE should be 128bit aligned for 32bit argument
7289 passing ABI. XXX: This function is obsolete and is only used for
7290 checking psABI compatibility with previous versions of GCC. */
7291
7292 static bool
7293 ix86_compat_aligned_value_p (const_tree type)
7294 {
7295 enum machine_mode mode = TYPE_MODE (type);
7296 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7297 || mode == TDmode
7298 || mode == TFmode
7299 || mode == TCmode)
7300 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7301 return true;
7302 if (TYPE_ALIGN (type) < 128)
7303 return false;
7304
7305 if (AGGREGATE_TYPE_P (type))
7306 {
7307 /* Walk the aggregates recursively. */
7308 switch (TREE_CODE (type))
7309 {
7310 case RECORD_TYPE:
7311 case UNION_TYPE:
7312 case QUAL_UNION_TYPE:
7313 {
7314 tree field;
7315
7316 /* Walk all the structure fields. */
7317 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7318 {
7319 if (TREE_CODE (field) == FIELD_DECL
7320 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7321 return true;
7322 }
7323 break;
7324 }
7325
7326 case ARRAY_TYPE:
7327 /* Just for use if some languages passes arrays by value. */
7328 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7329 return true;
7330 break;
7331
7332 default:
7333 gcc_unreachable ();
7334 }
7335 }
7336 return false;
7337 }
7338
7339 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7340 XXX: This function is obsolete and is only used for checking psABI
7341 compatibility with previous versions of GCC. */
7342
7343 static unsigned int
7344 ix86_compat_function_arg_boundary (enum machine_mode mode,
7345 const_tree type, unsigned int align)
7346 {
7347 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7348 natural boundaries. */
7349 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7350 {
7351 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7352 make an exception for SSE modes since these require 128bit
7353 alignment.
7354
7355 The handling here differs from field_alignment. ICC aligns MMX
7356 arguments to 4 byte boundaries, while structure fields are aligned
7357 to 8 byte boundaries. */
7358 if (!type)
7359 {
7360 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7361 align = PARM_BOUNDARY;
7362 }
7363 else
7364 {
7365 if (!ix86_compat_aligned_value_p (type))
7366 align = PARM_BOUNDARY;
7367 }
7368 }
7369 if (align > BIGGEST_ALIGNMENT)
7370 align = BIGGEST_ALIGNMENT;
7371 return align;
7372 }
7373
7374 /* Return true when TYPE should be 128bit aligned for 32bit argument
7375 passing ABI. */
7376
7377 static bool
7378 ix86_contains_aligned_value_p (const_tree type)
7379 {
7380 enum machine_mode mode = TYPE_MODE (type);
7381
7382 if (mode == XFmode || mode == XCmode)
7383 return false;
7384
7385 if (TYPE_ALIGN (type) < 128)
7386 return false;
7387
7388 if (AGGREGATE_TYPE_P (type))
7389 {
7390 /* Walk the aggregates recursively. */
7391 switch (TREE_CODE (type))
7392 {
7393 case RECORD_TYPE:
7394 case UNION_TYPE:
7395 case QUAL_UNION_TYPE:
7396 {
7397 tree field;
7398
7399 /* Walk all the structure fields. */
7400 for (field = TYPE_FIELDS (type);
7401 field;
7402 field = DECL_CHAIN (field))
7403 {
7404 if (TREE_CODE (field) == FIELD_DECL
7405 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7406 return true;
7407 }
7408 break;
7409 }
7410
7411 case ARRAY_TYPE:
7412 /* Just for use if some languages passes arrays by value. */
7413 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7414 return true;
7415 break;
7416
7417 default:
7418 gcc_unreachable ();
7419 }
7420 }
7421 else
7422 return TYPE_ALIGN (type) >= 128;
7423
7424 return false;
7425 }
7426
7427 /* Gives the alignment boundary, in bits, of an argument with the
7428 specified mode and type. */
7429
7430 static unsigned int
7431 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7432 {
7433 unsigned int align;
7434 if (type)
7435 {
7436 /* Since the main variant type is used for call, we convert it to
7437 the main variant type. */
7438 type = TYPE_MAIN_VARIANT (type);
7439 align = TYPE_ALIGN (type);
7440 }
7441 else
7442 align = GET_MODE_ALIGNMENT (mode);
7443 if (align < PARM_BOUNDARY)
7444 align = PARM_BOUNDARY;
7445 else
7446 {
7447 static bool warned;
7448 unsigned int saved_align = align;
7449
7450 if (!TARGET_64BIT)
7451 {
7452 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7453 if (!type)
7454 {
7455 if (mode == XFmode || mode == XCmode)
7456 align = PARM_BOUNDARY;
7457 }
7458 else if (!ix86_contains_aligned_value_p (type))
7459 align = PARM_BOUNDARY;
7460
7461 if (align < 128)
7462 align = PARM_BOUNDARY;
7463 }
7464
7465 if (warn_psabi
7466 && !warned
7467 && align != ix86_compat_function_arg_boundary (mode, type,
7468 saved_align))
7469 {
7470 warned = true;
7471 inform (input_location,
7472 "The ABI for passing parameters with %d-byte"
7473 " alignment has changed in GCC 4.6",
7474 align / BITS_PER_UNIT);
7475 }
7476 }
7477
7478 return align;
7479 }
7480
7481 /* Return true if N is a possible register number of function value. */
7482
7483 static bool
7484 ix86_function_value_regno_p (const unsigned int regno)
7485 {
7486 switch (regno)
7487 {
7488 case 0:
7489 return true;
7490
7491 case FIRST_FLOAT_REG:
7492 /* TODO: The function should depend on current function ABI but
7493 builtins.c would need updating then. Therefore we use the
7494 default ABI. */
7495 if (TARGET_64BIT && ix86_abi == MS_ABI)
7496 return false;
7497 return TARGET_FLOAT_RETURNS_IN_80387;
7498
7499 case FIRST_SSE_REG:
7500 return TARGET_SSE;
7501
7502 case FIRST_MMX_REG:
7503 if (TARGET_MACHO || TARGET_64BIT)
7504 return false;
7505 return TARGET_MMX;
7506 }
7507
7508 return false;
7509 }
7510
7511 /* Define how to find the value returned by a function.
7512 VALTYPE is the data type of the value (as a tree).
7513 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7514 otherwise, FUNC is 0. */
7515
7516 static rtx
7517 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7518 const_tree fntype, const_tree fn)
7519 {
7520 unsigned int regno;
7521
7522 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7523 we normally prevent this case when mmx is not available. However
7524 some ABIs may require the result to be returned like DImode. */
7525 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7526 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7527
7528 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7529 we prevent this case when sse is not available. However some ABIs
7530 may require the result to be returned like integer TImode. */
7531 else if (mode == TImode
7532 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7533 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7534
7535 /* 32-byte vector modes in %ymm0. */
7536 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7537 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7538
7539 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7540 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7541 regno = FIRST_FLOAT_REG;
7542 else
7543 /* Most things go in %eax. */
7544 regno = AX_REG;
7545
7546 /* Override FP return register with %xmm0 for local functions when
7547 SSE math is enabled or for functions with sseregparm attribute. */
7548 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7549 {
7550 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7551 if ((sse_level >= 1 && mode == SFmode)
7552 || (sse_level == 2 && mode == DFmode))
7553 regno = FIRST_SSE_REG;
7554 }
7555
7556 /* OImode shouldn't be used directly. */
7557 gcc_assert (mode != OImode);
7558
7559 return gen_rtx_REG (orig_mode, regno);
7560 }
7561
7562 static rtx
7563 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7564 const_tree valtype)
7565 {
7566 rtx ret;
7567
7568 /* Handle libcalls, which don't provide a type node. */
7569 if (valtype == NULL)
7570 {
7571 switch (mode)
7572 {
7573 case SFmode:
7574 case SCmode:
7575 case DFmode:
7576 case DCmode:
7577 case TFmode:
7578 case SDmode:
7579 case DDmode:
7580 case TDmode:
7581 return gen_rtx_REG (mode, FIRST_SSE_REG);
7582 case XFmode:
7583 case XCmode:
7584 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7585 case TCmode:
7586 return NULL;
7587 default:
7588 return gen_rtx_REG (mode, AX_REG);
7589 }
7590 }
7591
7592 ret = construct_container (mode, orig_mode, valtype, 1,
7593 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7594 x86_64_int_return_registers, 0);
7595
7596 /* For zero sized structures, construct_container returns NULL, but we
7597 need to keep rest of compiler happy by returning meaningful value. */
7598 if (!ret)
7599 ret = gen_rtx_REG (orig_mode, AX_REG);
7600
7601 return ret;
7602 }
7603
7604 static rtx
7605 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7606 {
7607 unsigned int regno = AX_REG;
7608
7609 if (TARGET_SSE)
7610 {
7611 switch (GET_MODE_SIZE (mode))
7612 {
7613 case 16:
7614 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7615 && !COMPLEX_MODE_P (mode))
7616 regno = FIRST_SSE_REG;
7617 break;
7618 case 8:
7619 case 4:
7620 if (mode == SFmode || mode == DFmode)
7621 regno = FIRST_SSE_REG;
7622 break;
7623 default:
7624 break;
7625 }
7626 }
7627 return gen_rtx_REG (orig_mode, regno);
7628 }
7629
7630 static rtx
7631 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7632 enum machine_mode orig_mode, enum machine_mode mode)
7633 {
7634 const_tree fn, fntype;
7635
7636 fn = NULL_TREE;
7637 if (fntype_or_decl && DECL_P (fntype_or_decl))
7638 fn = fntype_or_decl;
7639 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7640
7641 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7642 return function_value_ms_64 (orig_mode, mode);
7643 else if (TARGET_64BIT)
7644 return function_value_64 (orig_mode, mode, valtype);
7645 else
7646 return function_value_32 (orig_mode, mode, fntype, fn);
7647 }
7648
7649 static rtx
7650 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7651 bool outgoing ATTRIBUTE_UNUSED)
7652 {
7653 enum machine_mode mode, orig_mode;
7654
7655 orig_mode = TYPE_MODE (valtype);
7656 mode = type_natural_mode (valtype, NULL);
7657 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7658 }
7659
7660 rtx
7661 ix86_libcall_value (enum machine_mode mode)
7662 {
7663 return ix86_function_value_1 (NULL, NULL, mode, mode);
7664 }
7665
7666 /* Return true iff type is returned in memory. */
7667
7668 static bool ATTRIBUTE_UNUSED
7669 return_in_memory_32 (const_tree type, enum machine_mode mode)
7670 {
7671 HOST_WIDE_INT size;
7672
7673 if (mode == BLKmode)
7674 return true;
7675
7676 size = int_size_in_bytes (type);
7677
7678 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7679 return false;
7680
7681 if (VECTOR_MODE_P (mode) || mode == TImode)
7682 {
7683 /* User-created vectors small enough to fit in EAX. */
7684 if (size < 8)
7685 return false;
7686
7687 /* MMX/3dNow values are returned in MM0,
7688 except when it doesn't exits or the ABI prescribes otherwise. */
7689 if (size == 8)
7690 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7691
7692 /* SSE values are returned in XMM0, except when it doesn't exist. */
7693 if (size == 16)
7694 return !TARGET_SSE;
7695
7696 /* AVX values are returned in YMM0, except when it doesn't exist. */
7697 if (size == 32)
7698 return !TARGET_AVX;
7699 }
7700
7701 if (mode == XFmode)
7702 return false;
7703
7704 if (size > 12)
7705 return true;
7706
7707 /* OImode shouldn't be used directly. */
7708 gcc_assert (mode != OImode);
7709
7710 return false;
7711 }
7712
7713 static bool ATTRIBUTE_UNUSED
7714 return_in_memory_64 (const_tree type, enum machine_mode mode)
7715 {
7716 int needed_intregs, needed_sseregs;
7717 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7718 }
7719
7720 static bool ATTRIBUTE_UNUSED
7721 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7722 {
7723 HOST_WIDE_INT size = int_size_in_bytes (type);
7724
7725 /* __m128 is returned in xmm0. */
7726 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7727 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7728 return false;
7729
7730 /* Otherwise, the size must be exactly in [1248]. */
7731 return size != 1 && size != 2 && size != 4 && size != 8;
7732 }
7733
7734 static bool
7735 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7736 {
7737 #ifdef SUBTARGET_RETURN_IN_MEMORY
7738 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7739 #else
7740 const enum machine_mode mode = type_natural_mode (type, NULL);
7741
7742 if (TARGET_64BIT)
7743 {
7744 if (ix86_function_type_abi (fntype) == MS_ABI)
7745 return return_in_memory_ms_64 (type, mode);
7746 else
7747 return return_in_memory_64 (type, mode);
7748 }
7749 else
7750 return return_in_memory_32 (type, mode);
7751 #endif
7752 }
7753
7754 /* When returning SSE vector types, we have a choice of either
7755 (1) being abi incompatible with a -march switch, or
7756 (2) generating an error.
7757 Given no good solution, I think the safest thing is one warning.
7758 The user won't be able to use -Werror, but....
7759
7760 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7761 called in response to actually generating a caller or callee that
7762 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7763 via aggregate_value_p for general type probing from tree-ssa. */
7764
7765 static rtx
7766 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7767 {
7768 static bool warnedsse, warnedmmx;
7769
7770 if (!TARGET_64BIT && type)
7771 {
7772 /* Look at the return type of the function, not the function type. */
7773 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7774
7775 if (!TARGET_SSE && !warnedsse)
7776 {
7777 if (mode == TImode
7778 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7779 {
7780 warnedsse = true;
7781 warning (0, "SSE vector return without SSE enabled "
7782 "changes the ABI");
7783 }
7784 }
7785
7786 if (!TARGET_MMX && !warnedmmx)
7787 {
7788 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7789 {
7790 warnedmmx = true;
7791 warning (0, "MMX vector return without MMX enabled "
7792 "changes the ABI");
7793 }
7794 }
7795 }
7796
7797 return NULL;
7798 }
7799
7800 \f
7801 /* Create the va_list data type. */
7802
7803 /* Returns the calling convention specific va_list date type.
7804 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7805
7806 static tree
7807 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7808 {
7809 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7810
7811 /* For i386 we use plain pointer to argument area. */
7812 if (!TARGET_64BIT || abi == MS_ABI)
7813 return build_pointer_type (char_type_node);
7814
7815 record = lang_hooks.types.make_type (RECORD_TYPE);
7816 type_decl = build_decl (BUILTINS_LOCATION,
7817 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7818
7819 f_gpr = build_decl (BUILTINS_LOCATION,
7820 FIELD_DECL, get_identifier ("gp_offset"),
7821 unsigned_type_node);
7822 f_fpr = build_decl (BUILTINS_LOCATION,
7823 FIELD_DECL, get_identifier ("fp_offset"),
7824 unsigned_type_node);
7825 f_ovf = build_decl (BUILTINS_LOCATION,
7826 FIELD_DECL, get_identifier ("overflow_arg_area"),
7827 ptr_type_node);
7828 f_sav = build_decl (BUILTINS_LOCATION,
7829 FIELD_DECL, get_identifier ("reg_save_area"),
7830 ptr_type_node);
7831
7832 va_list_gpr_counter_field = f_gpr;
7833 va_list_fpr_counter_field = f_fpr;
7834
7835 DECL_FIELD_CONTEXT (f_gpr) = record;
7836 DECL_FIELD_CONTEXT (f_fpr) = record;
7837 DECL_FIELD_CONTEXT (f_ovf) = record;
7838 DECL_FIELD_CONTEXT (f_sav) = record;
7839
7840 TYPE_STUB_DECL (record) = type_decl;
7841 TYPE_NAME (record) = type_decl;
7842 TYPE_FIELDS (record) = f_gpr;
7843 DECL_CHAIN (f_gpr) = f_fpr;
7844 DECL_CHAIN (f_fpr) = f_ovf;
7845 DECL_CHAIN (f_ovf) = f_sav;
7846
7847 layout_type (record);
7848
7849 /* The correct type is an array type of one element. */
7850 return build_array_type (record, build_index_type (size_zero_node));
7851 }
7852
7853 /* Setup the builtin va_list data type and for 64-bit the additional
7854 calling convention specific va_list data types. */
7855
7856 static tree
7857 ix86_build_builtin_va_list (void)
7858 {
7859 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7860
7861 /* Initialize abi specific va_list builtin types. */
7862 if (TARGET_64BIT)
7863 {
7864 tree t;
7865 if (ix86_abi == MS_ABI)
7866 {
7867 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7868 if (TREE_CODE (t) != RECORD_TYPE)
7869 t = build_variant_type_copy (t);
7870 sysv_va_list_type_node = t;
7871 }
7872 else
7873 {
7874 t = ret;
7875 if (TREE_CODE (t) != RECORD_TYPE)
7876 t = build_variant_type_copy (t);
7877 sysv_va_list_type_node = t;
7878 }
7879 if (ix86_abi != MS_ABI)
7880 {
7881 t = ix86_build_builtin_va_list_abi (MS_ABI);
7882 if (TREE_CODE (t) != RECORD_TYPE)
7883 t = build_variant_type_copy (t);
7884 ms_va_list_type_node = t;
7885 }
7886 else
7887 {
7888 t = ret;
7889 if (TREE_CODE (t) != RECORD_TYPE)
7890 t = build_variant_type_copy (t);
7891 ms_va_list_type_node = t;
7892 }
7893 }
7894
7895 return ret;
7896 }
7897
7898 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7899
7900 static void
7901 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7902 {
7903 rtx save_area, mem;
7904 alias_set_type set;
7905 int i, max;
7906
7907 /* GPR size of varargs save area. */
7908 if (cfun->va_list_gpr_size)
7909 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7910 else
7911 ix86_varargs_gpr_size = 0;
7912
7913 /* FPR size of varargs save area. We don't need it if we don't pass
7914 anything in SSE registers. */
7915 if (TARGET_SSE && cfun->va_list_fpr_size)
7916 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7917 else
7918 ix86_varargs_fpr_size = 0;
7919
7920 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7921 return;
7922
7923 save_area = frame_pointer_rtx;
7924 set = get_varargs_alias_set ();
7925
7926 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7927 if (max > X86_64_REGPARM_MAX)
7928 max = X86_64_REGPARM_MAX;
7929
7930 for (i = cum->regno; i < max; i++)
7931 {
7932 mem = gen_rtx_MEM (Pmode,
7933 plus_constant (save_area, i * UNITS_PER_WORD));
7934 MEM_NOTRAP_P (mem) = 1;
7935 set_mem_alias_set (mem, set);
7936 emit_move_insn (mem, gen_rtx_REG (Pmode,
7937 x86_64_int_parameter_registers[i]));
7938 }
7939
7940 if (ix86_varargs_fpr_size)
7941 {
7942 enum machine_mode smode;
7943 rtx label, test;
7944
7945 /* Now emit code to save SSE registers. The AX parameter contains number
7946 of SSE parameter registers used to call this function, though all we
7947 actually check here is the zero/non-zero status. */
7948
7949 label = gen_label_rtx ();
7950 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7951 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7952 label));
7953
7954 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7955 we used movdqa (i.e. TImode) instead? Perhaps even better would
7956 be if we could determine the real mode of the data, via a hook
7957 into pass_stdarg. Ignore all that for now. */
7958 smode = V4SFmode;
7959 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7960 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7961
7962 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7963 if (max > X86_64_SSE_REGPARM_MAX)
7964 max = X86_64_SSE_REGPARM_MAX;
7965
7966 for (i = cum->sse_regno; i < max; ++i)
7967 {
7968 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7969 mem = gen_rtx_MEM (smode, mem);
7970 MEM_NOTRAP_P (mem) = 1;
7971 set_mem_alias_set (mem, set);
7972 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7973
7974 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7975 }
7976
7977 emit_label (label);
7978 }
7979 }
7980
7981 static void
7982 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7983 {
7984 alias_set_type set = get_varargs_alias_set ();
7985 int i;
7986
7987 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7988 {
7989 rtx reg, mem;
7990
7991 mem = gen_rtx_MEM (Pmode,
7992 plus_constant (virtual_incoming_args_rtx,
7993 i * UNITS_PER_WORD));
7994 MEM_NOTRAP_P (mem) = 1;
7995 set_mem_alias_set (mem, set);
7996
7997 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7998 emit_move_insn (mem, reg);
7999 }
8000 }
8001
8002 static void
8003 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8004 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8005 int no_rtl)
8006 {
8007 CUMULATIVE_ARGS next_cum;
8008 tree fntype;
8009
8010 /* This argument doesn't appear to be used anymore. Which is good,
8011 because the old code here didn't suppress rtl generation. */
8012 gcc_assert (!no_rtl);
8013
8014 if (!TARGET_64BIT)
8015 return;
8016
8017 fntype = TREE_TYPE (current_function_decl);
8018
8019 /* For varargs, we do not want to skip the dummy va_dcl argument.
8020 For stdargs, we do want to skip the last named argument. */
8021 next_cum = *cum;
8022 if (stdarg_p (fntype))
8023 ix86_function_arg_advance (&next_cum, mode, type, true);
8024
8025 if (cum->call_abi == MS_ABI)
8026 setup_incoming_varargs_ms_64 (&next_cum);
8027 else
8028 setup_incoming_varargs_64 (&next_cum);
8029 }
8030
8031 /* Checks if TYPE is of kind va_list char *. */
8032
8033 static bool
8034 is_va_list_char_pointer (tree type)
8035 {
8036 tree canonic;
8037
8038 /* For 32-bit it is always true. */
8039 if (!TARGET_64BIT)
8040 return true;
8041 canonic = ix86_canonical_va_list_type (type);
8042 return (canonic == ms_va_list_type_node
8043 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8044 }
8045
8046 /* Implement va_start. */
8047
8048 static void
8049 ix86_va_start (tree valist, rtx nextarg)
8050 {
8051 HOST_WIDE_INT words, n_gpr, n_fpr;
8052 tree f_gpr, f_fpr, f_ovf, f_sav;
8053 tree gpr, fpr, ovf, sav, t;
8054 tree type;
8055 rtx ovf_rtx;
8056
8057 if (flag_split_stack
8058 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8059 {
8060 unsigned int scratch_regno;
8061
8062 /* When we are splitting the stack, we can't refer to the stack
8063 arguments using internal_arg_pointer, because they may be on
8064 the old stack. The split stack prologue will arrange to
8065 leave a pointer to the old stack arguments in a scratch
8066 register, which we here copy to a pseudo-register. The split
8067 stack prologue can't set the pseudo-register directly because
8068 it (the prologue) runs before any registers have been saved. */
8069
8070 scratch_regno = split_stack_prologue_scratch_regno ();
8071 if (scratch_regno != INVALID_REGNUM)
8072 {
8073 rtx reg, seq;
8074
8075 reg = gen_reg_rtx (Pmode);
8076 cfun->machine->split_stack_varargs_pointer = reg;
8077
8078 start_sequence ();
8079 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8080 seq = get_insns ();
8081 end_sequence ();
8082
8083 push_topmost_sequence ();
8084 emit_insn_after (seq, entry_of_function ());
8085 pop_topmost_sequence ();
8086 }
8087 }
8088
8089 /* Only 64bit target needs something special. */
8090 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8091 {
8092 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8093 std_expand_builtin_va_start (valist, nextarg);
8094 else
8095 {
8096 rtx va_r, next;
8097
8098 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8099 next = expand_binop (ptr_mode, add_optab,
8100 cfun->machine->split_stack_varargs_pointer,
8101 crtl->args.arg_offset_rtx,
8102 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8103 convert_move (va_r, next, 0);
8104 }
8105 return;
8106 }
8107
8108 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8109 f_fpr = DECL_CHAIN (f_gpr);
8110 f_ovf = DECL_CHAIN (f_fpr);
8111 f_sav = DECL_CHAIN (f_ovf);
8112
8113 valist = build_simple_mem_ref (valist);
8114 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8115 /* The following should be folded into the MEM_REF offset. */
8116 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8117 f_gpr, NULL_TREE);
8118 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8119 f_fpr, NULL_TREE);
8120 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8121 f_ovf, NULL_TREE);
8122 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8123 f_sav, NULL_TREE);
8124
8125 /* Count number of gp and fp argument registers used. */
8126 words = crtl->args.info.words;
8127 n_gpr = crtl->args.info.regno;
8128 n_fpr = crtl->args.info.sse_regno;
8129
8130 if (cfun->va_list_gpr_size)
8131 {
8132 type = TREE_TYPE (gpr);
8133 t = build2 (MODIFY_EXPR, type,
8134 gpr, build_int_cst (type, n_gpr * 8));
8135 TREE_SIDE_EFFECTS (t) = 1;
8136 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8137 }
8138
8139 if (TARGET_SSE && cfun->va_list_fpr_size)
8140 {
8141 type = TREE_TYPE (fpr);
8142 t = build2 (MODIFY_EXPR, type, fpr,
8143 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8144 TREE_SIDE_EFFECTS (t) = 1;
8145 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8146 }
8147
8148 /* Find the overflow area. */
8149 type = TREE_TYPE (ovf);
8150 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8151 ovf_rtx = crtl->args.internal_arg_pointer;
8152 else
8153 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8154 t = make_tree (type, ovf_rtx);
8155 if (words != 0)
8156 t = build2 (POINTER_PLUS_EXPR, type, t,
8157 size_int (words * UNITS_PER_WORD));
8158 t = build2 (MODIFY_EXPR, type, ovf, t);
8159 TREE_SIDE_EFFECTS (t) = 1;
8160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8161
8162 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8163 {
8164 /* Find the register save area.
8165 Prologue of the function save it right above stack frame. */
8166 type = TREE_TYPE (sav);
8167 t = make_tree (type, frame_pointer_rtx);
8168 if (!ix86_varargs_gpr_size)
8169 t = build2 (POINTER_PLUS_EXPR, type, t,
8170 size_int (-8 * X86_64_REGPARM_MAX));
8171 t = build2 (MODIFY_EXPR, type, sav, t);
8172 TREE_SIDE_EFFECTS (t) = 1;
8173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8174 }
8175 }
8176
8177 /* Implement va_arg. */
8178
8179 static tree
8180 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8181 gimple_seq *post_p)
8182 {
8183 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8184 tree f_gpr, f_fpr, f_ovf, f_sav;
8185 tree gpr, fpr, ovf, sav, t;
8186 int size, rsize;
8187 tree lab_false, lab_over = NULL_TREE;
8188 tree addr, t2;
8189 rtx container;
8190 int indirect_p = 0;
8191 tree ptrtype;
8192 enum machine_mode nat_mode;
8193 unsigned int arg_boundary;
8194
8195 /* Only 64bit target needs something special. */
8196 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8197 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8198
8199 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8200 f_fpr = DECL_CHAIN (f_gpr);
8201 f_ovf = DECL_CHAIN (f_fpr);
8202 f_sav = DECL_CHAIN (f_ovf);
8203
8204 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8205 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8206 valist = build_va_arg_indirect_ref (valist);
8207 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8208 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8209 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8210
8211 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8212 if (indirect_p)
8213 type = build_pointer_type (type);
8214 size = int_size_in_bytes (type);
8215 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8216
8217 nat_mode = type_natural_mode (type, NULL);
8218 switch (nat_mode)
8219 {
8220 case V8SFmode:
8221 case V8SImode:
8222 case V32QImode:
8223 case V16HImode:
8224 case V4DFmode:
8225 case V4DImode:
8226 /* Unnamed 256bit vector mode parameters are passed on stack. */
8227 if (!TARGET_64BIT_MS_ABI)
8228 {
8229 container = NULL;
8230 break;
8231 }
8232
8233 default:
8234 container = construct_container (nat_mode, TYPE_MODE (type),
8235 type, 0, X86_64_REGPARM_MAX,
8236 X86_64_SSE_REGPARM_MAX, intreg,
8237 0);
8238 break;
8239 }
8240
8241 /* Pull the value out of the saved registers. */
8242
8243 addr = create_tmp_var (ptr_type_node, "addr");
8244
8245 if (container)
8246 {
8247 int needed_intregs, needed_sseregs;
8248 bool need_temp;
8249 tree int_addr, sse_addr;
8250
8251 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8252 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8253
8254 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8255
8256 need_temp = (!REG_P (container)
8257 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8258 || TYPE_ALIGN (type) > 128));
8259
8260 /* In case we are passing structure, verify that it is consecutive block
8261 on the register save area. If not we need to do moves. */
8262 if (!need_temp && !REG_P (container))
8263 {
8264 /* Verify that all registers are strictly consecutive */
8265 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8266 {
8267 int i;
8268
8269 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8270 {
8271 rtx slot = XVECEXP (container, 0, i);
8272 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8273 || INTVAL (XEXP (slot, 1)) != i * 16)
8274 need_temp = 1;
8275 }
8276 }
8277 else
8278 {
8279 int i;
8280
8281 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8282 {
8283 rtx slot = XVECEXP (container, 0, i);
8284 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8285 || INTVAL (XEXP (slot, 1)) != i * 8)
8286 need_temp = 1;
8287 }
8288 }
8289 }
8290 if (!need_temp)
8291 {
8292 int_addr = addr;
8293 sse_addr = addr;
8294 }
8295 else
8296 {
8297 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8298 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8299 }
8300
8301 /* First ensure that we fit completely in registers. */
8302 if (needed_intregs)
8303 {
8304 t = build_int_cst (TREE_TYPE (gpr),
8305 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8306 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8307 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8308 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8309 gimplify_and_add (t, pre_p);
8310 }
8311 if (needed_sseregs)
8312 {
8313 t = build_int_cst (TREE_TYPE (fpr),
8314 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8315 + X86_64_REGPARM_MAX * 8);
8316 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8317 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8318 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8319 gimplify_and_add (t, pre_p);
8320 }
8321
8322 /* Compute index to start of area used for integer regs. */
8323 if (needed_intregs)
8324 {
8325 /* int_addr = gpr + sav; */
8326 t = fold_convert (sizetype, gpr);
8327 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8328 gimplify_assign (int_addr, t, pre_p);
8329 }
8330 if (needed_sseregs)
8331 {
8332 /* sse_addr = fpr + sav; */
8333 t = fold_convert (sizetype, fpr);
8334 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8335 gimplify_assign (sse_addr, t, pre_p);
8336 }
8337 if (need_temp)
8338 {
8339 int i, prev_size = 0;
8340 tree temp = create_tmp_var (type, "va_arg_tmp");
8341
8342 /* addr = &temp; */
8343 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8344 gimplify_assign (addr, t, pre_p);
8345
8346 for (i = 0; i < XVECLEN (container, 0); i++)
8347 {
8348 rtx slot = XVECEXP (container, 0, i);
8349 rtx reg = XEXP (slot, 0);
8350 enum machine_mode mode = GET_MODE (reg);
8351 tree piece_type;
8352 tree addr_type;
8353 tree daddr_type;
8354 tree src_addr, src;
8355 int src_offset;
8356 tree dest_addr, dest;
8357 int cur_size = GET_MODE_SIZE (mode);
8358
8359 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8360 prev_size = INTVAL (XEXP (slot, 1));
8361 if (prev_size + cur_size > size)
8362 {
8363 cur_size = size - prev_size;
8364 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8365 if (mode == BLKmode)
8366 mode = QImode;
8367 }
8368 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8369 if (mode == GET_MODE (reg))
8370 addr_type = build_pointer_type (piece_type);
8371 else
8372 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8373 true);
8374 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8375 true);
8376
8377 if (SSE_REGNO_P (REGNO (reg)))
8378 {
8379 src_addr = sse_addr;
8380 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8381 }
8382 else
8383 {
8384 src_addr = int_addr;
8385 src_offset = REGNO (reg) * 8;
8386 }
8387 src_addr = fold_convert (addr_type, src_addr);
8388 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8389 size_int (src_offset));
8390
8391 dest_addr = fold_convert (daddr_type, addr);
8392 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8393 size_int (prev_size));
8394 if (cur_size == GET_MODE_SIZE (mode))
8395 {
8396 src = build_va_arg_indirect_ref (src_addr);
8397 dest = build_va_arg_indirect_ref (dest_addr);
8398
8399 gimplify_assign (dest, src, pre_p);
8400 }
8401 else
8402 {
8403 tree copy
8404 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8405 3, dest_addr, src_addr,
8406 size_int (cur_size));
8407 gimplify_and_add (copy, pre_p);
8408 }
8409 prev_size += cur_size;
8410 }
8411 }
8412
8413 if (needed_intregs)
8414 {
8415 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8416 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8417 gimplify_assign (gpr, t, pre_p);
8418 }
8419
8420 if (needed_sseregs)
8421 {
8422 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8423 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8424 gimplify_assign (fpr, t, pre_p);
8425 }
8426
8427 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8428
8429 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8430 }
8431
8432 /* ... otherwise out of the overflow area. */
8433
8434 /* When we align parameter on stack for caller, if the parameter
8435 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8436 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8437 here with caller. */
8438 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8439 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8440 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8441
8442 /* Care for on-stack alignment if needed. */
8443 if (arg_boundary <= 64 || size == 0)
8444 t = ovf;
8445 else
8446 {
8447 HOST_WIDE_INT align = arg_boundary / 8;
8448 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8449 size_int (align - 1));
8450 t = fold_convert (sizetype, t);
8451 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8452 size_int (-align));
8453 t = fold_convert (TREE_TYPE (ovf), t);
8454 }
8455
8456 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8457 gimplify_assign (addr, t, pre_p);
8458
8459 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8460 size_int (rsize * UNITS_PER_WORD));
8461 gimplify_assign (unshare_expr (ovf), t, pre_p);
8462
8463 if (container)
8464 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8465
8466 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8467 addr = fold_convert (ptrtype, addr);
8468
8469 if (indirect_p)
8470 addr = build_va_arg_indirect_ref (addr);
8471 return build_va_arg_indirect_ref (addr);
8472 }
8473 \f
8474 /* Return true if OPNUM's MEM should be matched
8475 in movabs* patterns. */
8476
8477 bool
8478 ix86_check_movabs (rtx insn, int opnum)
8479 {
8480 rtx set, mem;
8481
8482 set = PATTERN (insn);
8483 if (GET_CODE (set) == PARALLEL)
8484 set = XVECEXP (set, 0, 0);
8485 gcc_assert (GET_CODE (set) == SET);
8486 mem = XEXP (set, opnum);
8487 while (GET_CODE (mem) == SUBREG)
8488 mem = SUBREG_REG (mem);
8489 gcc_assert (MEM_P (mem));
8490 return volatile_ok || !MEM_VOLATILE_P (mem);
8491 }
8492 \f
8493 /* Initialize the table of extra 80387 mathematical constants. */
8494
8495 static void
8496 init_ext_80387_constants (void)
8497 {
8498 static const char * cst[5] =
8499 {
8500 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8501 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8502 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8503 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8504 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8505 };
8506 int i;
8507
8508 for (i = 0; i < 5; i++)
8509 {
8510 real_from_string (&ext_80387_constants_table[i], cst[i]);
8511 /* Ensure each constant is rounded to XFmode precision. */
8512 real_convert (&ext_80387_constants_table[i],
8513 XFmode, &ext_80387_constants_table[i]);
8514 }
8515
8516 ext_80387_constants_init = 1;
8517 }
8518
8519 /* Return non-zero if the constant is something that
8520 can be loaded with a special instruction. */
8521
8522 int
8523 standard_80387_constant_p (rtx x)
8524 {
8525 enum machine_mode mode = GET_MODE (x);
8526
8527 REAL_VALUE_TYPE r;
8528
8529 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8530 return -1;
8531
8532 if (x == CONST0_RTX (mode))
8533 return 1;
8534 if (x == CONST1_RTX (mode))
8535 return 2;
8536
8537 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8538
8539 /* For XFmode constants, try to find a special 80387 instruction when
8540 optimizing for size or on those CPUs that benefit from them. */
8541 if (mode == XFmode
8542 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8543 {
8544 int i;
8545
8546 if (! ext_80387_constants_init)
8547 init_ext_80387_constants ();
8548
8549 for (i = 0; i < 5; i++)
8550 if (real_identical (&r, &ext_80387_constants_table[i]))
8551 return i + 3;
8552 }
8553
8554 /* Load of the constant -0.0 or -1.0 will be split as
8555 fldz;fchs or fld1;fchs sequence. */
8556 if (real_isnegzero (&r))
8557 return 8;
8558 if (real_identical (&r, &dconstm1))
8559 return 9;
8560
8561 return 0;
8562 }
8563
8564 /* Return the opcode of the special instruction to be used to load
8565 the constant X. */
8566
8567 const char *
8568 standard_80387_constant_opcode (rtx x)
8569 {
8570 switch (standard_80387_constant_p (x))
8571 {
8572 case 1:
8573 return "fldz";
8574 case 2:
8575 return "fld1";
8576 case 3:
8577 return "fldlg2";
8578 case 4:
8579 return "fldln2";
8580 case 5:
8581 return "fldl2e";
8582 case 6:
8583 return "fldl2t";
8584 case 7:
8585 return "fldpi";
8586 case 8:
8587 case 9:
8588 return "#";
8589 default:
8590 gcc_unreachable ();
8591 }
8592 }
8593
8594 /* Return the CONST_DOUBLE representing the 80387 constant that is
8595 loaded by the specified special instruction. The argument IDX
8596 matches the return value from standard_80387_constant_p. */
8597
8598 rtx
8599 standard_80387_constant_rtx (int idx)
8600 {
8601 int i;
8602
8603 if (! ext_80387_constants_init)
8604 init_ext_80387_constants ();
8605
8606 switch (idx)
8607 {
8608 case 3:
8609 case 4:
8610 case 5:
8611 case 6:
8612 case 7:
8613 i = idx - 3;
8614 break;
8615
8616 default:
8617 gcc_unreachable ();
8618 }
8619
8620 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8621 XFmode);
8622 }
8623
8624 /* Return 1 if X is all 0s and 2 if x is all 1s
8625 in supported SSE vector mode. */
8626
8627 int
8628 standard_sse_constant_p (rtx x)
8629 {
8630 enum machine_mode mode = GET_MODE (x);
8631
8632 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8633 return 1;
8634 if (vector_all_ones_operand (x, mode))
8635 switch (mode)
8636 {
8637 case V16QImode:
8638 case V8HImode:
8639 case V4SImode:
8640 case V2DImode:
8641 if (TARGET_SSE2)
8642 return 2;
8643 default:
8644 break;
8645 }
8646
8647 return 0;
8648 }
8649
8650 /* Return the opcode of the special instruction to be used to load
8651 the constant X. */
8652
8653 const char *
8654 standard_sse_constant_opcode (rtx insn, rtx x)
8655 {
8656 switch (standard_sse_constant_p (x))
8657 {
8658 case 1:
8659 switch (get_attr_mode (insn))
8660 {
8661 case MODE_V4SF:
8662 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8663 case MODE_V2DF:
8664 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8665 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8666 else
8667 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8668 case MODE_TI:
8669 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8670 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8671 else
8672 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8673 case MODE_V8SF:
8674 return "vxorps\t%x0, %x0, %x0";
8675 case MODE_V4DF:
8676 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8677 return "vxorps\t%x0, %x0, %x0";
8678 else
8679 return "vxorpd\t%x0, %x0, %x0";
8680 case MODE_OI:
8681 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8682 return "vxorps\t%x0, %x0, %x0";
8683 else
8684 return "vpxor\t%x0, %x0, %x0";
8685 default:
8686 break;
8687 }
8688 case 2:
8689 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8690 default:
8691 break;
8692 }
8693 gcc_unreachable ();
8694 }
8695
8696 /* Returns true if OP contains a symbol reference */
8697
8698 bool
8699 symbolic_reference_mentioned_p (rtx op)
8700 {
8701 const char *fmt;
8702 int i;
8703
8704 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8705 return true;
8706
8707 fmt = GET_RTX_FORMAT (GET_CODE (op));
8708 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8709 {
8710 if (fmt[i] == 'E')
8711 {
8712 int j;
8713
8714 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8715 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8716 return true;
8717 }
8718
8719 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8720 return true;
8721 }
8722
8723 return false;
8724 }
8725
8726 /* Return true if it is appropriate to emit `ret' instructions in the
8727 body of a function. Do this only if the epilogue is simple, needing a
8728 couple of insns. Prior to reloading, we can't tell how many registers
8729 must be saved, so return false then. Return false if there is no frame
8730 marker to de-allocate. */
8731
8732 bool
8733 ix86_can_use_return_insn_p (void)
8734 {
8735 struct ix86_frame frame;
8736
8737 if (! reload_completed || frame_pointer_needed)
8738 return 0;
8739
8740 /* Don't allow more than 32k pop, since that's all we can do
8741 with one instruction. */
8742 if (crtl->args.pops_args && crtl->args.size >= 32768)
8743 return 0;
8744
8745 ix86_compute_frame_layout (&frame);
8746 return (frame.stack_pointer_offset == UNITS_PER_WORD
8747 && (frame.nregs + frame.nsseregs) == 0);
8748 }
8749 \f
8750 /* Value should be nonzero if functions must have frame pointers.
8751 Zero means the frame pointer need not be set up (and parms may
8752 be accessed via the stack pointer) in functions that seem suitable. */
8753
8754 static bool
8755 ix86_frame_pointer_required (void)
8756 {
8757 /* If we accessed previous frames, then the generated code expects
8758 to be able to access the saved ebp value in our frame. */
8759 if (cfun->machine->accesses_prev_frame)
8760 return true;
8761
8762 /* Several x86 os'es need a frame pointer for other reasons,
8763 usually pertaining to setjmp. */
8764 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8765 return true;
8766
8767 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8768 turns off the frame pointer by default. Turn it back on now if
8769 we've not got a leaf function. */
8770 if (TARGET_OMIT_LEAF_FRAME_POINTER
8771 && (!current_function_is_leaf
8772 || ix86_current_function_calls_tls_descriptor))
8773 return true;
8774
8775 if (crtl->profile && !flag_fentry)
8776 return true;
8777
8778 return false;
8779 }
8780
8781 /* Record that the current function accesses previous call frames. */
8782
8783 void
8784 ix86_setup_frame_addresses (void)
8785 {
8786 cfun->machine->accesses_prev_frame = 1;
8787 }
8788 \f
8789 #ifndef USE_HIDDEN_LINKONCE
8790 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8791 # define USE_HIDDEN_LINKONCE 1
8792 # else
8793 # define USE_HIDDEN_LINKONCE 0
8794 # endif
8795 #endif
8796
8797 static int pic_labels_used;
8798
8799 /* Fills in the label name that should be used for a pc thunk for
8800 the given register. */
8801
8802 static void
8803 get_pc_thunk_name (char name[32], unsigned int regno)
8804 {
8805 gcc_assert (!TARGET_64BIT);
8806
8807 if (USE_HIDDEN_LINKONCE)
8808 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8809 else
8810 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8811 }
8812
8813
8814 /* This function generates code for -fpic that loads %ebx with
8815 the return address of the caller and then returns. */
8816
8817 static void
8818 ix86_code_end (void)
8819 {
8820 rtx xops[2];
8821 int regno;
8822
8823 for (regno = AX_REG; regno <= SP_REG; regno++)
8824 {
8825 char name[32];
8826 tree decl;
8827
8828 if (!(pic_labels_used & (1 << regno)))
8829 continue;
8830
8831 get_pc_thunk_name (name, regno);
8832
8833 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8834 get_identifier (name),
8835 build_function_type (void_type_node, void_list_node));
8836 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8837 NULL_TREE, void_type_node);
8838 TREE_PUBLIC (decl) = 1;
8839 TREE_STATIC (decl) = 1;
8840
8841 #if TARGET_MACHO
8842 if (TARGET_MACHO)
8843 {
8844 switch_to_section (darwin_sections[text_coal_section]);
8845 fputs ("\t.weak_definition\t", asm_out_file);
8846 assemble_name (asm_out_file, name);
8847 fputs ("\n\t.private_extern\t", asm_out_file);
8848 assemble_name (asm_out_file, name);
8849 putc ('\n', asm_out_file);
8850 ASM_OUTPUT_LABEL (asm_out_file, name);
8851 DECL_WEAK (decl) = 1;
8852 }
8853 else
8854 #endif
8855 if (USE_HIDDEN_LINKONCE)
8856 {
8857 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8858
8859 targetm.asm_out.unique_section (decl, 0);
8860 switch_to_section (get_named_section (decl, NULL, 0));
8861
8862 targetm.asm_out.globalize_label (asm_out_file, name);
8863 fputs ("\t.hidden\t", asm_out_file);
8864 assemble_name (asm_out_file, name);
8865 putc ('\n', asm_out_file);
8866 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8867 }
8868 else
8869 {
8870 switch_to_section (text_section);
8871 ASM_OUTPUT_LABEL (asm_out_file, name);
8872 }
8873
8874 DECL_INITIAL (decl) = make_node (BLOCK);
8875 current_function_decl = decl;
8876 init_function_start (decl);
8877 first_function_block_is_cold = false;
8878 /* Make sure unwind info is emitted for the thunk if needed. */
8879 final_start_function (emit_barrier (), asm_out_file, 1);
8880
8881 /* Pad stack IP move with 4 instructions (two NOPs count
8882 as one instruction). */
8883 if (TARGET_PAD_SHORT_FUNCTION)
8884 {
8885 int i = 8;
8886
8887 while (i--)
8888 fputs ("\tnop\n", asm_out_file);
8889 }
8890
8891 xops[0] = gen_rtx_REG (Pmode, regno);
8892 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8893 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8894 fputs ("\tret\n", asm_out_file);
8895 final_end_function ();
8896 init_insn_lengths ();
8897 free_after_compilation (cfun);
8898 set_cfun (NULL);
8899 current_function_decl = NULL;
8900 }
8901
8902 if (flag_split_stack)
8903 file_end_indicate_split_stack ();
8904 }
8905
8906 /* Emit code for the SET_GOT patterns. */
8907
8908 const char *
8909 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8910 {
8911 rtx xops[3];
8912
8913 xops[0] = dest;
8914
8915 if (TARGET_VXWORKS_RTP && flag_pic)
8916 {
8917 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8918 xops[2] = gen_rtx_MEM (Pmode,
8919 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8920 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8921
8922 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8923 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8924 an unadorned address. */
8925 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8926 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8927 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8928 return "";
8929 }
8930
8931 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8932
8933 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8934 {
8935 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8936
8937 if (!flag_pic)
8938 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8939 else
8940 {
8941 output_asm_insn ("call\t%a2", xops);
8942 #ifdef DWARF2_UNWIND_INFO
8943 /* The call to next label acts as a push. */
8944 if (dwarf2out_do_frame ())
8945 {
8946 rtx insn;
8947 start_sequence ();
8948 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8949 gen_rtx_PLUS (Pmode,
8950 stack_pointer_rtx,
8951 GEN_INT (-4))));
8952 RTX_FRAME_RELATED_P (insn) = 1;
8953 dwarf2out_frame_debug (insn, true);
8954 end_sequence ();
8955 }
8956 #endif
8957 }
8958
8959 #if TARGET_MACHO
8960 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8961 is what will be referenced by the Mach-O PIC subsystem. */
8962 if (!label)
8963 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8964 #endif
8965
8966 targetm.asm_out.internal_label (asm_out_file, "L",
8967 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8968
8969 if (flag_pic)
8970 {
8971 output_asm_insn ("pop%z0\t%0", xops);
8972 #ifdef DWARF2_UNWIND_INFO
8973 /* The pop is a pop and clobbers dest, but doesn't restore it
8974 for unwind info purposes. */
8975 if (dwarf2out_do_frame ())
8976 {
8977 rtx insn;
8978 start_sequence ();
8979 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8980 dwarf2out_frame_debug (insn, true);
8981 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8982 gen_rtx_PLUS (Pmode,
8983 stack_pointer_rtx,
8984 GEN_INT (4))));
8985 RTX_FRAME_RELATED_P (insn) = 1;
8986 dwarf2out_frame_debug (insn, true);
8987 end_sequence ();
8988 }
8989 #endif
8990 }
8991 }
8992 else
8993 {
8994 char name[32];
8995 get_pc_thunk_name (name, REGNO (dest));
8996 pic_labels_used |= 1 << REGNO (dest);
8997
8998 #ifdef DWARF2_UNWIND_INFO
8999 /* Ensure all queued register saves are flushed before the
9000 call. */
9001 if (dwarf2out_do_frame ())
9002 dwarf2out_flush_queued_reg_saves ();
9003 #endif
9004 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9005 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9006 output_asm_insn ("call\t%X2", xops);
9007 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
9008 is what will be referenced by the Mach-O PIC subsystem. */
9009 #if TARGET_MACHO
9010 if (!label)
9011 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9012 else
9013 targetm.asm_out.internal_label (asm_out_file, "L",
9014 CODE_LABEL_NUMBER (label));
9015 #endif
9016 }
9017
9018 if (TARGET_MACHO)
9019 return "";
9020
9021 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
9022 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9023 else
9024 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
9025
9026 return "";
9027 }
9028
9029 /* Generate an "push" pattern for input ARG. */
9030
9031 static rtx
9032 gen_push (rtx arg)
9033 {
9034 struct machine_function *m = cfun->machine;
9035
9036 if (m->fs.cfa_reg == stack_pointer_rtx)
9037 m->fs.cfa_offset += UNITS_PER_WORD;
9038 m->fs.sp_offset += UNITS_PER_WORD;
9039
9040 return gen_rtx_SET (VOIDmode,
9041 gen_rtx_MEM (Pmode,
9042 gen_rtx_PRE_DEC (Pmode,
9043 stack_pointer_rtx)),
9044 arg);
9045 }
9046
9047 /* Generate an "pop" pattern for input ARG. */
9048
9049 static rtx
9050 gen_pop (rtx arg)
9051 {
9052 return gen_rtx_SET (VOIDmode,
9053 arg,
9054 gen_rtx_MEM (Pmode,
9055 gen_rtx_POST_INC (Pmode,
9056 stack_pointer_rtx)));
9057 }
9058
9059 /* Return >= 0 if there is an unused call-clobbered register available
9060 for the entire function. */
9061
9062 static unsigned int
9063 ix86_select_alt_pic_regnum (void)
9064 {
9065 if (current_function_is_leaf
9066 && !crtl->profile
9067 && !ix86_current_function_calls_tls_descriptor)
9068 {
9069 int i, drap;
9070 /* Can't use the same register for both PIC and DRAP. */
9071 if (crtl->drap_reg)
9072 drap = REGNO (crtl->drap_reg);
9073 else
9074 drap = -1;
9075 for (i = 2; i >= 0; --i)
9076 if (i != drap && !df_regs_ever_live_p (i))
9077 return i;
9078 }
9079
9080 return INVALID_REGNUM;
9081 }
9082
9083 /* Return 1 if we need to save REGNO. */
9084 static int
9085 ix86_save_reg (unsigned int regno, int maybe_eh_return)
9086 {
9087 if (pic_offset_table_rtx
9088 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9089 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9090 || crtl->profile
9091 || crtl->calls_eh_return
9092 || crtl->uses_const_pool))
9093 {
9094 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9095 return 0;
9096 return 1;
9097 }
9098
9099 if (crtl->calls_eh_return && maybe_eh_return)
9100 {
9101 unsigned i;
9102 for (i = 0; ; i++)
9103 {
9104 unsigned test = EH_RETURN_DATA_REGNO (i);
9105 if (test == INVALID_REGNUM)
9106 break;
9107 if (test == regno)
9108 return 1;
9109 }
9110 }
9111
9112 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9113 return 1;
9114
9115 return (df_regs_ever_live_p (regno)
9116 && !call_used_regs[regno]
9117 && !fixed_regs[regno]
9118 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9119 }
9120
9121 /* Return number of saved general prupose registers. */
9122
9123 static int
9124 ix86_nsaved_regs (void)
9125 {
9126 int nregs = 0;
9127 int regno;
9128
9129 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9130 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9131 nregs ++;
9132 return nregs;
9133 }
9134
9135 /* Return number of saved SSE registrers. */
9136
9137 static int
9138 ix86_nsaved_sseregs (void)
9139 {
9140 int nregs = 0;
9141 int regno;
9142
9143 if (!TARGET_64BIT_MS_ABI)
9144 return 0;
9145 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9146 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9147 nregs ++;
9148 return nregs;
9149 }
9150
9151 /* Given FROM and TO register numbers, say whether this elimination is
9152 allowed. If stack alignment is needed, we can only replace argument
9153 pointer with hard frame pointer, or replace frame pointer with stack
9154 pointer. Otherwise, frame pointer elimination is automatically
9155 handled and all other eliminations are valid. */
9156
9157 static bool
9158 ix86_can_eliminate (const int from, const int to)
9159 {
9160 if (stack_realign_fp)
9161 return ((from == ARG_POINTER_REGNUM
9162 && to == HARD_FRAME_POINTER_REGNUM)
9163 || (from == FRAME_POINTER_REGNUM
9164 && to == STACK_POINTER_REGNUM));
9165 else
9166 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9167 }
9168
9169 /* Return the offset between two registers, one to be eliminated, and the other
9170 its replacement, at the start of a routine. */
9171
9172 HOST_WIDE_INT
9173 ix86_initial_elimination_offset (int from, int to)
9174 {
9175 struct ix86_frame frame;
9176 ix86_compute_frame_layout (&frame);
9177
9178 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9179 return frame.hard_frame_pointer_offset;
9180 else if (from == FRAME_POINTER_REGNUM
9181 && to == HARD_FRAME_POINTER_REGNUM)
9182 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9183 else
9184 {
9185 gcc_assert (to == STACK_POINTER_REGNUM);
9186
9187 if (from == ARG_POINTER_REGNUM)
9188 return frame.stack_pointer_offset;
9189
9190 gcc_assert (from == FRAME_POINTER_REGNUM);
9191 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9192 }
9193 }
9194
9195 /* In a dynamically-aligned function, we can't know the offset from
9196 stack pointer to frame pointer, so we must ensure that setjmp
9197 eliminates fp against the hard fp (%ebp) rather than trying to
9198 index from %esp up to the top of the frame across a gap that is
9199 of unknown (at compile-time) size. */
9200 static rtx
9201 ix86_builtin_setjmp_frame_value (void)
9202 {
9203 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9204 }
9205
9206 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9207 field in the TCB, so they can not be used together. */
9208
9209 static bool
9210 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9211 struct gcc_options *opts ATTRIBUTE_UNUSED)
9212 {
9213 bool ret = true;
9214
9215 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9216 if (report)
9217 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9218 ret = false;
9219 #else
9220 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9221 {
9222 if (report)
9223 error ("%<-fsplit-stack%> requires "
9224 "assembler support for CFI directives");
9225 ret = false;
9226 }
9227 #endif
9228
9229 return ret;
9230 }
9231
9232 /* When using -fsplit-stack, the allocation routines set a field in
9233 the TCB to the bottom of the stack plus this much space, measured
9234 in bytes. */
9235
9236 #define SPLIT_STACK_AVAILABLE 256
9237
9238 /* Fill structure ix86_frame about frame of currently computed function. */
9239
9240 static void
9241 ix86_compute_frame_layout (struct ix86_frame *frame)
9242 {
9243 unsigned int stack_alignment_needed;
9244 HOST_WIDE_INT offset;
9245 unsigned int preferred_alignment;
9246 HOST_WIDE_INT size = get_frame_size ();
9247 HOST_WIDE_INT to_allocate;
9248
9249 frame->nregs = ix86_nsaved_regs ();
9250 frame->nsseregs = ix86_nsaved_sseregs ();
9251
9252 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9253 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9254
9255 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9256 function prologues and leaf. */
9257 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9258 && (!current_function_is_leaf || cfun->calls_alloca != 0
9259 || ix86_current_function_calls_tls_descriptor))
9260 {
9261 preferred_alignment = 16;
9262 stack_alignment_needed = 16;
9263 crtl->preferred_stack_boundary = 128;
9264 crtl->stack_alignment_needed = 128;
9265 }
9266
9267 gcc_assert (!size || stack_alignment_needed);
9268 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9269 gcc_assert (preferred_alignment <= stack_alignment_needed);
9270
9271 /* For SEH we have to limit the amount of code movement into the prologue.
9272 At present we do this via a BLOCKAGE, at which point there's very little
9273 scheduling that can be done, which means that there's very little point
9274 in doing anything except PUSHs. */
9275 if (TARGET_SEH)
9276 cfun->machine->use_fast_prologue_epilogue = false;
9277
9278 /* During reload iteration the amount of registers saved can change.
9279 Recompute the value as needed. Do not recompute when amount of registers
9280 didn't change as reload does multiple calls to the function and does not
9281 expect the decision to change within single iteration. */
9282 else if (!optimize_function_for_size_p (cfun)
9283 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9284 {
9285 int count = frame->nregs;
9286 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9287
9288 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9289
9290 /* The fast prologue uses move instead of push to save registers. This
9291 is significantly longer, but also executes faster as modern hardware
9292 can execute the moves in parallel, but can't do that for push/pop.
9293
9294 Be careful about choosing what prologue to emit: When function takes
9295 many instructions to execute we may use slow version as well as in
9296 case function is known to be outside hot spot (this is known with
9297 feedback only). Weight the size of function by number of registers
9298 to save as it is cheap to use one or two push instructions but very
9299 slow to use many of them. */
9300 if (count)
9301 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9302 if (node->frequency < NODE_FREQUENCY_NORMAL
9303 || (flag_branch_probabilities
9304 && node->frequency < NODE_FREQUENCY_HOT))
9305 cfun->machine->use_fast_prologue_epilogue = false;
9306 else
9307 cfun->machine->use_fast_prologue_epilogue
9308 = !expensive_function_p (count);
9309 }
9310 if (TARGET_PROLOGUE_USING_MOVE
9311 && cfun->machine->use_fast_prologue_epilogue)
9312 frame->save_regs_using_mov = true;
9313 else
9314 frame->save_regs_using_mov = false;
9315
9316 /* If static stack checking is enabled and done with probes, the registers
9317 need to be saved before allocating the frame. */
9318 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9319 frame->save_regs_using_mov = false;
9320
9321 /* Skip return address. */
9322 offset = UNITS_PER_WORD;
9323
9324 /* Skip pushed static chain. */
9325 if (ix86_static_chain_on_stack)
9326 offset += UNITS_PER_WORD;
9327
9328 /* Skip saved base pointer. */
9329 if (frame_pointer_needed)
9330 offset += UNITS_PER_WORD;
9331 frame->hfp_save_offset = offset;
9332
9333 /* The traditional frame pointer location is at the top of the frame. */
9334 frame->hard_frame_pointer_offset = offset;
9335
9336 /* Register save area */
9337 offset += frame->nregs * UNITS_PER_WORD;
9338 frame->reg_save_offset = offset;
9339
9340 /* Align and set SSE register save area. */
9341 if (frame->nsseregs)
9342 {
9343 /* The only ABI that has saved SSE registers (Win64) also has a
9344 16-byte aligned default stack, and thus we don't need to be
9345 within the re-aligned local stack frame to save them. */
9346 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9347 offset = (offset + 16 - 1) & -16;
9348 offset += frame->nsseregs * 16;
9349 }
9350 frame->sse_reg_save_offset = offset;
9351
9352 /* The re-aligned stack starts here. Values before this point are not
9353 directly comparable with values below this point. In order to make
9354 sure that no value happens to be the same before and after, force
9355 the alignment computation below to add a non-zero value. */
9356 if (stack_realign_fp)
9357 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9358
9359 /* Va-arg area */
9360 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9361 offset += frame->va_arg_size;
9362
9363 /* Align start of frame for local function. */
9364 if (stack_realign_fp
9365 || offset != frame->sse_reg_save_offset
9366 || size != 0
9367 || !current_function_is_leaf
9368 || cfun->calls_alloca
9369 || ix86_current_function_calls_tls_descriptor)
9370 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9371
9372 /* Frame pointer points here. */
9373 frame->frame_pointer_offset = offset;
9374
9375 offset += size;
9376
9377 /* Add outgoing arguments area. Can be skipped if we eliminated
9378 all the function calls as dead code.
9379 Skipping is however impossible when function calls alloca. Alloca
9380 expander assumes that last crtl->outgoing_args_size
9381 of stack frame are unused. */
9382 if (ACCUMULATE_OUTGOING_ARGS
9383 && (!current_function_is_leaf || cfun->calls_alloca
9384 || ix86_current_function_calls_tls_descriptor))
9385 {
9386 offset += crtl->outgoing_args_size;
9387 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9388 }
9389 else
9390 frame->outgoing_arguments_size = 0;
9391
9392 /* Align stack boundary. Only needed if we're calling another function
9393 or using alloca. */
9394 if (!current_function_is_leaf || cfun->calls_alloca
9395 || ix86_current_function_calls_tls_descriptor)
9396 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9397
9398 /* We've reached end of stack frame. */
9399 frame->stack_pointer_offset = offset;
9400
9401 /* Size prologue needs to allocate. */
9402 to_allocate = offset - frame->sse_reg_save_offset;
9403
9404 if ((!to_allocate && frame->nregs <= 1)
9405 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9406 frame->save_regs_using_mov = false;
9407
9408 if (ix86_using_red_zone ()
9409 && current_function_sp_is_unchanging
9410 && current_function_is_leaf
9411 && !ix86_current_function_calls_tls_descriptor)
9412 {
9413 frame->red_zone_size = to_allocate;
9414 if (frame->save_regs_using_mov)
9415 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9416 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9417 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9418 }
9419 else
9420 frame->red_zone_size = 0;
9421 frame->stack_pointer_offset -= frame->red_zone_size;
9422
9423 /* The SEH frame pointer location is near the bottom of the frame.
9424 This is enforced by the fact that the difference between the
9425 stack pointer and the frame pointer is limited to 240 bytes in
9426 the unwind data structure. */
9427 if (TARGET_SEH)
9428 {
9429 HOST_WIDE_INT diff;
9430
9431 /* If we can leave the frame pointer where it is, do so. */
9432 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9433 if (diff > 240 || (diff & 15) != 0)
9434 {
9435 /* Ideally we'd determine what portion of the local stack frame
9436 (within the constraint of the lowest 240) is most heavily used.
9437 But without that complication, simply bias the frame pointer
9438 by 128 bytes so as to maximize the amount of the local stack
9439 frame that is addressable with 8-bit offsets. */
9440 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9441 }
9442 }
9443 }
9444
9445 /* This is semi-inlined memory_address_length, but simplified
9446 since we know that we're always dealing with reg+offset, and
9447 to avoid having to create and discard all that rtl. */
9448
9449 static inline int
9450 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9451 {
9452 int len = 4;
9453
9454 if (offset == 0)
9455 {
9456 /* EBP and R13 cannot be encoded without an offset. */
9457 len = (regno == BP_REG || regno == R13_REG);
9458 }
9459 else if (IN_RANGE (offset, -128, 127))
9460 len = 1;
9461
9462 /* ESP and R12 must be encoded with a SIB byte. */
9463 if (regno == SP_REG || regno == R12_REG)
9464 len++;
9465
9466 return len;
9467 }
9468
9469 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9470 The valid base registers are taken from CFUN->MACHINE->FS. */
9471
9472 static rtx
9473 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9474 {
9475 const struct machine_function *m = cfun->machine;
9476 rtx base_reg = NULL;
9477 HOST_WIDE_INT base_offset = 0;
9478
9479 if (m->use_fast_prologue_epilogue)
9480 {
9481 /* Choose the base register most likely to allow the most scheduling
9482 opportunities. Generally FP is valid througout the function,
9483 while DRAP must be reloaded within the epilogue. But choose either
9484 over the SP due to increased encoding size. */
9485
9486 if (m->fs.fp_valid)
9487 {
9488 base_reg = hard_frame_pointer_rtx;
9489 base_offset = m->fs.fp_offset - cfa_offset;
9490 }
9491 else if (m->fs.drap_valid)
9492 {
9493 base_reg = crtl->drap_reg;
9494 base_offset = 0 - cfa_offset;
9495 }
9496 else if (m->fs.sp_valid)
9497 {
9498 base_reg = stack_pointer_rtx;
9499 base_offset = m->fs.sp_offset - cfa_offset;
9500 }
9501 }
9502 else
9503 {
9504 HOST_WIDE_INT toffset;
9505 int len = 16, tlen;
9506
9507 /* Choose the base register with the smallest address encoding.
9508 With a tie, choose FP > DRAP > SP. */
9509 if (m->fs.sp_valid)
9510 {
9511 base_reg = stack_pointer_rtx;
9512 base_offset = m->fs.sp_offset - cfa_offset;
9513 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9514 }
9515 if (m->fs.drap_valid)
9516 {
9517 toffset = 0 - cfa_offset;
9518 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9519 if (tlen <= len)
9520 {
9521 base_reg = crtl->drap_reg;
9522 base_offset = toffset;
9523 len = tlen;
9524 }
9525 }
9526 if (m->fs.fp_valid)
9527 {
9528 toffset = m->fs.fp_offset - cfa_offset;
9529 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9530 if (tlen <= len)
9531 {
9532 base_reg = hard_frame_pointer_rtx;
9533 base_offset = toffset;
9534 len = tlen;
9535 }
9536 }
9537 }
9538 gcc_assert (base_reg != NULL);
9539
9540 return plus_constant (base_reg, base_offset);
9541 }
9542
9543 /* Emit code to save registers in the prologue. */
9544
9545 static void
9546 ix86_emit_save_regs (void)
9547 {
9548 unsigned int regno;
9549 rtx insn;
9550
9551 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9552 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9553 {
9554 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9555 RTX_FRAME_RELATED_P (insn) = 1;
9556 }
9557 }
9558
9559 /* Emit a single register save at CFA - CFA_OFFSET. */
9560
9561 static void
9562 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9563 HOST_WIDE_INT cfa_offset)
9564 {
9565 struct machine_function *m = cfun->machine;
9566 rtx reg = gen_rtx_REG (mode, regno);
9567 rtx mem, addr, base, insn;
9568
9569 addr = choose_baseaddr (cfa_offset);
9570 mem = gen_frame_mem (mode, addr);
9571
9572 /* For SSE saves, we need to indicate the 128-bit alignment. */
9573 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9574
9575 insn = emit_move_insn (mem, reg);
9576 RTX_FRAME_RELATED_P (insn) = 1;
9577
9578 base = addr;
9579 if (GET_CODE (base) == PLUS)
9580 base = XEXP (base, 0);
9581 gcc_checking_assert (REG_P (base));
9582
9583 /* When saving registers into a re-aligned local stack frame, avoid
9584 any tricky guessing by dwarf2out. */
9585 if (m->fs.realigned)
9586 {
9587 gcc_checking_assert (stack_realign_drap);
9588
9589 if (regno == REGNO (crtl->drap_reg))
9590 {
9591 /* A bit of a hack. We force the DRAP register to be saved in
9592 the re-aligned stack frame, which provides us with a copy
9593 of the CFA that will last past the prologue. Install it. */
9594 gcc_checking_assert (cfun->machine->fs.fp_valid);
9595 addr = plus_constant (hard_frame_pointer_rtx,
9596 cfun->machine->fs.fp_offset - cfa_offset);
9597 mem = gen_rtx_MEM (mode, addr);
9598 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9599 }
9600 else
9601 {
9602 /* The frame pointer is a stable reference within the
9603 aligned frame. Use it. */
9604 gcc_checking_assert (cfun->machine->fs.fp_valid);
9605 addr = plus_constant (hard_frame_pointer_rtx,
9606 cfun->machine->fs.fp_offset - cfa_offset);
9607 mem = gen_rtx_MEM (mode, addr);
9608 add_reg_note (insn, REG_CFA_EXPRESSION,
9609 gen_rtx_SET (VOIDmode, mem, reg));
9610 }
9611 }
9612
9613 /* The memory may not be relative to the current CFA register,
9614 which means that we may need to generate a new pattern for
9615 use by the unwind info. */
9616 else if (base != m->fs.cfa_reg)
9617 {
9618 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9619 mem = gen_rtx_MEM (mode, addr);
9620 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9621 }
9622 }
9623
9624 /* Emit code to save registers using MOV insns.
9625 First register is stored at CFA - CFA_OFFSET. */
9626 static void
9627 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9628 {
9629 unsigned int regno;
9630
9631 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9632 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9633 {
9634 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9635 cfa_offset -= UNITS_PER_WORD;
9636 }
9637 }
9638
9639 /* Emit code to save SSE registers using MOV insns.
9640 First register is stored at CFA - CFA_OFFSET. */
9641 static void
9642 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9643 {
9644 unsigned int regno;
9645
9646 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9647 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9648 {
9649 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9650 cfa_offset -= 16;
9651 }
9652 }
9653
9654 static GTY(()) rtx queued_cfa_restores;
9655
9656 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9657 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9658 Don't add the note if the previously saved value will be left untouched
9659 within stack red-zone till return, as unwinders can find the same value
9660 in the register and on the stack. */
9661
9662 static void
9663 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9664 {
9665 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9666 return;
9667
9668 if (insn)
9669 {
9670 add_reg_note (insn, REG_CFA_RESTORE, reg);
9671 RTX_FRAME_RELATED_P (insn) = 1;
9672 }
9673 else
9674 queued_cfa_restores
9675 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9676 }
9677
9678 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9679
9680 static void
9681 ix86_add_queued_cfa_restore_notes (rtx insn)
9682 {
9683 rtx last;
9684 if (!queued_cfa_restores)
9685 return;
9686 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9687 ;
9688 XEXP (last, 1) = REG_NOTES (insn);
9689 REG_NOTES (insn) = queued_cfa_restores;
9690 queued_cfa_restores = NULL_RTX;
9691 RTX_FRAME_RELATED_P (insn) = 1;
9692 }
9693
9694 /* Expand prologue or epilogue stack adjustment.
9695 The pattern exist to put a dependency on all ebp-based memory accesses.
9696 STYLE should be negative if instructions should be marked as frame related,
9697 zero if %r11 register is live and cannot be freely used and positive
9698 otherwise. */
9699
9700 static void
9701 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9702 int style, bool set_cfa)
9703 {
9704 struct machine_function *m = cfun->machine;
9705 rtx insn;
9706 bool add_frame_related_expr = false;
9707
9708 if (! TARGET_64BIT)
9709 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9710 else if (x86_64_immediate_operand (offset, DImode))
9711 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9712 else
9713 {
9714 rtx tmp;
9715 /* r11 is used by indirect sibcall return as well, set before the
9716 epilogue and used after the epilogue. */
9717 if (style)
9718 tmp = gen_rtx_REG (DImode, R11_REG);
9719 else
9720 {
9721 gcc_assert (src != hard_frame_pointer_rtx
9722 && dest != hard_frame_pointer_rtx);
9723 tmp = hard_frame_pointer_rtx;
9724 }
9725 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9726 if (style < 0)
9727 add_frame_related_expr = true;
9728
9729 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9730 }
9731
9732 insn = emit_insn (insn);
9733 if (style >= 0)
9734 ix86_add_queued_cfa_restore_notes (insn);
9735
9736 if (set_cfa)
9737 {
9738 rtx r;
9739
9740 gcc_assert (m->fs.cfa_reg == src);
9741 m->fs.cfa_offset += INTVAL (offset);
9742 m->fs.cfa_reg = dest;
9743
9744 r = gen_rtx_PLUS (Pmode, src, offset);
9745 r = gen_rtx_SET (VOIDmode, dest, r);
9746 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9747 RTX_FRAME_RELATED_P (insn) = 1;
9748 }
9749 else if (style < 0)
9750 {
9751 RTX_FRAME_RELATED_P (insn) = 1;
9752 if (add_frame_related_expr)
9753 {
9754 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9755 r = gen_rtx_SET (VOIDmode, dest, r);
9756 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9757 }
9758 }
9759
9760 if (dest == stack_pointer_rtx)
9761 {
9762 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9763 bool valid = m->fs.sp_valid;
9764
9765 if (src == hard_frame_pointer_rtx)
9766 {
9767 valid = m->fs.fp_valid;
9768 ooffset = m->fs.fp_offset;
9769 }
9770 else if (src == crtl->drap_reg)
9771 {
9772 valid = m->fs.drap_valid;
9773 ooffset = 0;
9774 }
9775 else
9776 {
9777 /* Else there are two possibilities: SP itself, which we set
9778 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9779 taken care of this by hand along the eh_return path. */
9780 gcc_checking_assert (src == stack_pointer_rtx
9781 || offset == const0_rtx);
9782 }
9783
9784 m->fs.sp_offset = ooffset - INTVAL (offset);
9785 m->fs.sp_valid = valid;
9786 }
9787 }
9788
9789 /* Find an available register to be used as dynamic realign argument
9790 pointer regsiter. Such a register will be written in prologue and
9791 used in begin of body, so it must not be
9792 1. parameter passing register.
9793 2. GOT pointer.
9794 We reuse static-chain register if it is available. Otherwise, we
9795 use DI for i386 and R13 for x86-64. We chose R13 since it has
9796 shorter encoding.
9797
9798 Return: the regno of chosen register. */
9799
9800 static unsigned int
9801 find_drap_reg (void)
9802 {
9803 tree decl = cfun->decl;
9804
9805 if (TARGET_64BIT)
9806 {
9807 /* Use R13 for nested function or function need static chain.
9808 Since function with tail call may use any caller-saved
9809 registers in epilogue, DRAP must not use caller-saved
9810 register in such case. */
9811 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9812 return R13_REG;
9813
9814 return R10_REG;
9815 }
9816 else
9817 {
9818 /* Use DI for nested function or function need static chain.
9819 Since function with tail call may use any caller-saved
9820 registers in epilogue, DRAP must not use caller-saved
9821 register in such case. */
9822 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9823 return DI_REG;
9824
9825 /* Reuse static chain register if it isn't used for parameter
9826 passing. */
9827 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9828 {
9829 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9830 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9831 return CX_REG;
9832 }
9833 return DI_REG;
9834 }
9835 }
9836
9837 /* Return minimum incoming stack alignment. */
9838
9839 static unsigned int
9840 ix86_minimum_incoming_stack_boundary (bool sibcall)
9841 {
9842 unsigned int incoming_stack_boundary;
9843
9844 /* Prefer the one specified at command line. */
9845 if (ix86_user_incoming_stack_boundary)
9846 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9847 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9848 if -mstackrealign is used, it isn't used for sibcall check and
9849 estimated stack alignment is 128bit. */
9850 else if (!sibcall
9851 && !TARGET_64BIT
9852 && ix86_force_align_arg_pointer
9853 && crtl->stack_alignment_estimated == 128)
9854 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9855 else
9856 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9857
9858 /* Incoming stack alignment can be changed on individual functions
9859 via force_align_arg_pointer attribute. We use the smallest
9860 incoming stack boundary. */
9861 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9862 && lookup_attribute (ix86_force_align_arg_pointer_string,
9863 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9864 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9865
9866 /* The incoming stack frame has to be aligned at least at
9867 parm_stack_boundary. */
9868 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9869 incoming_stack_boundary = crtl->parm_stack_boundary;
9870
9871 /* Stack at entrance of main is aligned by runtime. We use the
9872 smallest incoming stack boundary. */
9873 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9874 && DECL_NAME (current_function_decl)
9875 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9876 && DECL_FILE_SCOPE_P (current_function_decl))
9877 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9878
9879 return incoming_stack_boundary;
9880 }
9881
9882 /* Update incoming stack boundary and estimated stack alignment. */
9883
9884 static void
9885 ix86_update_stack_boundary (void)
9886 {
9887 ix86_incoming_stack_boundary
9888 = ix86_minimum_incoming_stack_boundary (false);
9889
9890 /* x86_64 vararg needs 16byte stack alignment for register save
9891 area. */
9892 if (TARGET_64BIT
9893 && cfun->stdarg
9894 && crtl->stack_alignment_estimated < 128)
9895 crtl->stack_alignment_estimated = 128;
9896 }
9897
9898 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9899 needed or an rtx for DRAP otherwise. */
9900
9901 static rtx
9902 ix86_get_drap_rtx (void)
9903 {
9904 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9905 crtl->need_drap = true;
9906
9907 if (stack_realign_drap)
9908 {
9909 /* Assign DRAP to vDRAP and returns vDRAP */
9910 unsigned int regno = find_drap_reg ();
9911 rtx drap_vreg;
9912 rtx arg_ptr;
9913 rtx seq, insn;
9914
9915 arg_ptr = gen_rtx_REG (Pmode, regno);
9916 crtl->drap_reg = arg_ptr;
9917
9918 start_sequence ();
9919 drap_vreg = copy_to_reg (arg_ptr);
9920 seq = get_insns ();
9921 end_sequence ();
9922
9923 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9924 if (!optimize)
9925 {
9926 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9927 RTX_FRAME_RELATED_P (insn) = 1;
9928 }
9929 return drap_vreg;
9930 }
9931 else
9932 return NULL;
9933 }
9934
9935 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9936
9937 static rtx
9938 ix86_internal_arg_pointer (void)
9939 {
9940 return virtual_incoming_args_rtx;
9941 }
9942
9943 struct scratch_reg {
9944 rtx reg;
9945 bool saved;
9946 };
9947
9948 /* Return a short-lived scratch register for use on function entry.
9949 In 32-bit mode, it is valid only after the registers are saved
9950 in the prologue. This register must be released by means of
9951 release_scratch_register_on_entry once it is dead. */
9952
9953 static void
9954 get_scratch_register_on_entry (struct scratch_reg *sr)
9955 {
9956 int regno;
9957
9958 sr->saved = false;
9959
9960 if (TARGET_64BIT)
9961 {
9962 /* We always use R11 in 64-bit mode. */
9963 regno = R11_REG;
9964 }
9965 else
9966 {
9967 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9968 bool fastcall_p
9969 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9970 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9971 int regparm = ix86_function_regparm (fntype, decl);
9972 int drap_regno
9973 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9974
9975 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9976 for the static chain register. */
9977 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9978 && drap_regno != AX_REG)
9979 regno = AX_REG;
9980 else if (regparm < 2 && drap_regno != DX_REG)
9981 regno = DX_REG;
9982 /* ecx is the static chain register. */
9983 else if (regparm < 3 && !fastcall_p && !static_chain_p
9984 && drap_regno != CX_REG)
9985 regno = CX_REG;
9986 else if (ix86_save_reg (BX_REG, true))
9987 regno = BX_REG;
9988 /* esi is the static chain register. */
9989 else if (!(regparm == 3 && static_chain_p)
9990 && ix86_save_reg (SI_REG, true))
9991 regno = SI_REG;
9992 else if (ix86_save_reg (DI_REG, true))
9993 regno = DI_REG;
9994 else
9995 {
9996 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9997 sr->saved = true;
9998 }
9999 }
10000
10001 sr->reg = gen_rtx_REG (Pmode, regno);
10002 if (sr->saved)
10003 {
10004 rtx insn = emit_insn (gen_push (sr->reg));
10005 RTX_FRAME_RELATED_P (insn) = 1;
10006 }
10007 }
10008
10009 /* Release a scratch register obtained from the preceding function. */
10010
10011 static void
10012 release_scratch_register_on_entry (struct scratch_reg *sr)
10013 {
10014 if (sr->saved)
10015 {
10016 rtx x, insn = emit_insn (gen_pop (sr->reg));
10017
10018 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10019 RTX_FRAME_RELATED_P (insn) = 1;
10020 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10021 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10022 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10023 }
10024 }
10025
10026 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10027
10028 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10029
10030 static void
10031 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10032 {
10033 /* We skip the probe for the first interval + a small dope of 4 words and
10034 probe that many bytes past the specified size to maintain a protection
10035 area at the botton of the stack. */
10036 const int dope = 4 * UNITS_PER_WORD;
10037 rtx size_rtx = GEN_INT (size), last;
10038
10039 /* See if we have a constant small number of probes to generate. If so,
10040 that's the easy case. The run-time loop is made up of 11 insns in the
10041 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10042 for n # of intervals. */
10043 if (size <= 5 * PROBE_INTERVAL)
10044 {
10045 HOST_WIDE_INT i, adjust;
10046 bool first_probe = true;
10047
10048 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10049 values of N from 1 until it exceeds SIZE. If only one probe is
10050 needed, this will not generate any code. Then adjust and probe
10051 to PROBE_INTERVAL + SIZE. */
10052 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10053 {
10054 if (first_probe)
10055 {
10056 adjust = 2 * PROBE_INTERVAL + dope;
10057 first_probe = false;
10058 }
10059 else
10060 adjust = PROBE_INTERVAL;
10061
10062 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10063 plus_constant (stack_pointer_rtx, -adjust)));
10064 emit_stack_probe (stack_pointer_rtx);
10065 }
10066
10067 if (first_probe)
10068 adjust = size + PROBE_INTERVAL + dope;
10069 else
10070 adjust = size + PROBE_INTERVAL - i;
10071
10072 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10073 plus_constant (stack_pointer_rtx, -adjust)));
10074 emit_stack_probe (stack_pointer_rtx);
10075
10076 /* Adjust back to account for the additional first interval. */
10077 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10078 plus_constant (stack_pointer_rtx,
10079 PROBE_INTERVAL + dope)));
10080 }
10081
10082 /* Otherwise, do the same as above, but in a loop. Note that we must be
10083 extra careful with variables wrapping around because we might be at
10084 the very top (or the very bottom) of the address space and we have
10085 to be able to handle this case properly; in particular, we use an
10086 equality test for the loop condition. */
10087 else
10088 {
10089 HOST_WIDE_INT rounded_size;
10090 struct scratch_reg sr;
10091
10092 get_scratch_register_on_entry (&sr);
10093
10094
10095 /* Step 1: round SIZE to the previous multiple of the interval. */
10096
10097 rounded_size = size & -PROBE_INTERVAL;
10098
10099
10100 /* Step 2: compute initial and final value of the loop counter. */
10101
10102 /* SP = SP_0 + PROBE_INTERVAL. */
10103 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10104 plus_constant (stack_pointer_rtx,
10105 - (PROBE_INTERVAL + dope))));
10106
10107 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10108 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10109 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10110 gen_rtx_PLUS (Pmode, sr.reg,
10111 stack_pointer_rtx)));
10112
10113
10114 /* Step 3: the loop
10115
10116 while (SP != LAST_ADDR)
10117 {
10118 SP = SP + PROBE_INTERVAL
10119 probe at SP
10120 }
10121
10122 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10123 values of N from 1 until it is equal to ROUNDED_SIZE. */
10124
10125 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10126
10127
10128 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10129 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10130
10131 if (size != rounded_size)
10132 {
10133 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10134 plus_constant (stack_pointer_rtx,
10135 rounded_size - size)));
10136 emit_stack_probe (stack_pointer_rtx);
10137 }
10138
10139 /* Adjust back to account for the additional first interval. */
10140 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10141 plus_constant (stack_pointer_rtx,
10142 PROBE_INTERVAL + dope)));
10143
10144 release_scratch_register_on_entry (&sr);
10145 }
10146
10147 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10148
10149 /* Even if the stack pointer isn't the CFA register, we need to correctly
10150 describe the adjustments made to it, in particular differentiate the
10151 frame-related ones from the frame-unrelated ones. */
10152 if (size > 0)
10153 {
10154 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10155 XVECEXP (expr, 0, 0)
10156 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10157 plus_constant (stack_pointer_rtx, -size));
10158 XVECEXP (expr, 0, 1)
10159 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10160 plus_constant (stack_pointer_rtx,
10161 PROBE_INTERVAL + dope + size));
10162 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10163 RTX_FRAME_RELATED_P (last) = 1;
10164
10165 cfun->machine->fs.sp_offset += size;
10166 }
10167
10168 /* Make sure nothing is scheduled before we are done. */
10169 emit_insn (gen_blockage ());
10170 }
10171
10172 /* Adjust the stack pointer up to REG while probing it. */
10173
10174 const char *
10175 output_adjust_stack_and_probe (rtx reg)
10176 {
10177 static int labelno = 0;
10178 char loop_lab[32], end_lab[32];
10179 rtx xops[2];
10180
10181 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10182 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10183
10184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10185
10186 /* Jump to END_LAB if SP == LAST_ADDR. */
10187 xops[0] = stack_pointer_rtx;
10188 xops[1] = reg;
10189 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10190 fputs ("\tje\t", asm_out_file);
10191 assemble_name_raw (asm_out_file, end_lab);
10192 fputc ('\n', asm_out_file);
10193
10194 /* SP = SP + PROBE_INTERVAL. */
10195 xops[1] = GEN_INT (PROBE_INTERVAL);
10196 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10197
10198 /* Probe at SP. */
10199 xops[1] = const0_rtx;
10200 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10201
10202 fprintf (asm_out_file, "\tjmp\t");
10203 assemble_name_raw (asm_out_file, loop_lab);
10204 fputc ('\n', asm_out_file);
10205
10206 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10207
10208 return "";
10209 }
10210
10211 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10212 inclusive. These are offsets from the current stack pointer. */
10213
10214 static void
10215 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10216 {
10217 /* See if we have a constant small number of probes to generate. If so,
10218 that's the easy case. The run-time loop is made up of 7 insns in the
10219 generic case while the compile-time loop is made up of n insns for n #
10220 of intervals. */
10221 if (size <= 7 * PROBE_INTERVAL)
10222 {
10223 HOST_WIDE_INT i;
10224
10225 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10226 it exceeds SIZE. If only one probe is needed, this will not
10227 generate any code. Then probe at FIRST + SIZE. */
10228 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10229 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10230
10231 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10232 }
10233
10234 /* Otherwise, do the same as above, but in a loop. Note that we must be
10235 extra careful with variables wrapping around because we might be at
10236 the very top (or the very bottom) of the address space and we have
10237 to be able to handle this case properly; in particular, we use an
10238 equality test for the loop condition. */
10239 else
10240 {
10241 HOST_WIDE_INT rounded_size, last;
10242 struct scratch_reg sr;
10243
10244 get_scratch_register_on_entry (&sr);
10245
10246
10247 /* Step 1: round SIZE to the previous multiple of the interval. */
10248
10249 rounded_size = size & -PROBE_INTERVAL;
10250
10251
10252 /* Step 2: compute initial and final value of the loop counter. */
10253
10254 /* TEST_OFFSET = FIRST. */
10255 emit_move_insn (sr.reg, GEN_INT (-first));
10256
10257 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10258 last = first + rounded_size;
10259
10260
10261 /* Step 3: the loop
10262
10263 while (TEST_ADDR != LAST_ADDR)
10264 {
10265 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10266 probe at TEST_ADDR
10267 }
10268
10269 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10270 until it is equal to ROUNDED_SIZE. */
10271
10272 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10273
10274
10275 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10276 that SIZE is equal to ROUNDED_SIZE. */
10277
10278 if (size != rounded_size)
10279 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10280 stack_pointer_rtx,
10281 sr.reg),
10282 rounded_size - size));
10283
10284 release_scratch_register_on_entry (&sr);
10285 }
10286
10287 /* Make sure nothing is scheduled before we are done. */
10288 emit_insn (gen_blockage ());
10289 }
10290
10291 /* Probe a range of stack addresses from REG to END, inclusive. These are
10292 offsets from the current stack pointer. */
10293
10294 const char *
10295 output_probe_stack_range (rtx reg, rtx end)
10296 {
10297 static int labelno = 0;
10298 char loop_lab[32], end_lab[32];
10299 rtx xops[3];
10300
10301 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10302 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10303
10304 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10305
10306 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10307 xops[0] = reg;
10308 xops[1] = end;
10309 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10310 fputs ("\tje\t", asm_out_file);
10311 assemble_name_raw (asm_out_file, end_lab);
10312 fputc ('\n', asm_out_file);
10313
10314 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10315 xops[1] = GEN_INT (PROBE_INTERVAL);
10316 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10317
10318 /* Probe at TEST_ADDR. */
10319 xops[0] = stack_pointer_rtx;
10320 xops[1] = reg;
10321 xops[2] = const0_rtx;
10322 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10323
10324 fprintf (asm_out_file, "\tjmp\t");
10325 assemble_name_raw (asm_out_file, loop_lab);
10326 fputc ('\n', asm_out_file);
10327
10328 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10329
10330 return "";
10331 }
10332
10333 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10334 to be generated in correct form. */
10335 static void
10336 ix86_finalize_stack_realign_flags (void)
10337 {
10338 /* Check if stack realign is really needed after reload, and
10339 stores result in cfun */
10340 unsigned int incoming_stack_boundary
10341 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10342 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10343 unsigned int stack_realign = (incoming_stack_boundary
10344 < (current_function_is_leaf
10345 ? crtl->max_used_stack_slot_alignment
10346 : crtl->stack_alignment_needed));
10347
10348 if (crtl->stack_realign_finalized)
10349 {
10350 /* After stack_realign_needed is finalized, we can't no longer
10351 change it. */
10352 gcc_assert (crtl->stack_realign_needed == stack_realign);
10353 }
10354 else
10355 {
10356 crtl->stack_realign_needed = stack_realign;
10357 crtl->stack_realign_finalized = true;
10358 }
10359 }
10360
10361 /* Expand the prologue into a bunch of separate insns. */
10362
10363 void
10364 ix86_expand_prologue (void)
10365 {
10366 struct machine_function *m = cfun->machine;
10367 rtx insn, t;
10368 bool pic_reg_used;
10369 struct ix86_frame frame;
10370 HOST_WIDE_INT allocate;
10371 bool int_registers_saved;
10372
10373 ix86_finalize_stack_realign_flags ();
10374
10375 /* DRAP should not coexist with stack_realign_fp */
10376 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10377
10378 memset (&m->fs, 0, sizeof (m->fs));
10379
10380 /* Initialize CFA state for before the prologue. */
10381 m->fs.cfa_reg = stack_pointer_rtx;
10382 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10383
10384 /* Track SP offset to the CFA. We continue tracking this after we've
10385 swapped the CFA register away from SP. In the case of re-alignment
10386 this is fudged; we're interested to offsets within the local frame. */
10387 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10388 m->fs.sp_valid = true;
10389
10390 ix86_compute_frame_layout (&frame);
10391
10392 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10393 {
10394 /* We should have already generated an error for any use of
10395 ms_hook on a nested function. */
10396 gcc_checking_assert (!ix86_static_chain_on_stack);
10397
10398 /* Check if profiling is active and we shall use profiling before
10399 prologue variant. If so sorry. */
10400 if (crtl->profile && flag_fentry != 0)
10401 sorry ("ms_hook_prologue attribute isn%'t compatible "
10402 "with -mfentry for 32-bit");
10403
10404 /* In ix86_asm_output_function_label we emitted:
10405 8b ff movl.s %edi,%edi
10406 55 push %ebp
10407 8b ec movl.s %esp,%ebp
10408
10409 This matches the hookable function prologue in Win32 API
10410 functions in Microsoft Windows XP Service Pack 2 and newer.
10411 Wine uses this to enable Windows apps to hook the Win32 API
10412 functions provided by Wine.
10413
10414 What that means is that we've already set up the frame pointer. */
10415
10416 if (frame_pointer_needed
10417 && !(crtl->drap_reg && crtl->stack_realign_needed))
10418 {
10419 rtx push, mov;
10420
10421 /* We've decided to use the frame pointer already set up.
10422 Describe this to the unwinder by pretending that both
10423 push and mov insns happen right here.
10424
10425 Putting the unwind info here at the end of the ms_hook
10426 is done so that we can make absolutely certain we get
10427 the required byte sequence at the start of the function,
10428 rather than relying on an assembler that can produce
10429 the exact encoding required.
10430
10431 However it does mean (in the unpatched case) that we have
10432 a 1 insn window where the asynchronous unwind info is
10433 incorrect. However, if we placed the unwind info at
10434 its correct location we would have incorrect unwind info
10435 in the patched case. Which is probably all moot since
10436 I don't expect Wine generates dwarf2 unwind info for the
10437 system libraries that use this feature. */
10438
10439 insn = emit_insn (gen_blockage ());
10440
10441 push = gen_push (hard_frame_pointer_rtx);
10442 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10443 stack_pointer_rtx);
10444 RTX_FRAME_RELATED_P (push) = 1;
10445 RTX_FRAME_RELATED_P (mov) = 1;
10446
10447 RTX_FRAME_RELATED_P (insn) = 1;
10448 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10449 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10450
10451 /* Note that gen_push incremented m->fs.cfa_offset, even
10452 though we didn't emit the push insn here. */
10453 m->fs.cfa_reg = hard_frame_pointer_rtx;
10454 m->fs.fp_offset = m->fs.cfa_offset;
10455 m->fs.fp_valid = true;
10456 }
10457 else
10458 {
10459 /* The frame pointer is not needed so pop %ebp again.
10460 This leaves us with a pristine state. */
10461 emit_insn (gen_pop (hard_frame_pointer_rtx));
10462 }
10463 }
10464
10465 /* The first insn of a function that accepts its static chain on the
10466 stack is to push the register that would be filled in by a direct
10467 call. This insn will be skipped by the trampoline. */
10468 else if (ix86_static_chain_on_stack)
10469 {
10470 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10471 emit_insn (gen_blockage ());
10472
10473 /* We don't want to interpret this push insn as a register save,
10474 only as a stack adjustment. The real copy of the register as
10475 a save will be done later, if needed. */
10476 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10477 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10478 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10479 RTX_FRAME_RELATED_P (insn) = 1;
10480 }
10481
10482 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10483 of DRAP is needed and stack realignment is really needed after reload */
10484 if (stack_realign_drap)
10485 {
10486 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10487
10488 /* Only need to push parameter pointer reg if it is caller saved. */
10489 if (!call_used_regs[REGNO (crtl->drap_reg)])
10490 {
10491 /* Push arg pointer reg */
10492 insn = emit_insn (gen_push (crtl->drap_reg));
10493 RTX_FRAME_RELATED_P (insn) = 1;
10494 }
10495
10496 /* Grab the argument pointer. */
10497 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10498 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10499 RTX_FRAME_RELATED_P (insn) = 1;
10500 m->fs.cfa_reg = crtl->drap_reg;
10501 m->fs.cfa_offset = 0;
10502
10503 /* Align the stack. */
10504 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10505 stack_pointer_rtx,
10506 GEN_INT (-align_bytes)));
10507 RTX_FRAME_RELATED_P (insn) = 1;
10508
10509 /* Replicate the return address on the stack so that return
10510 address can be reached via (argp - 1) slot. This is needed
10511 to implement macro RETURN_ADDR_RTX and intrinsic function
10512 expand_builtin_return_addr etc. */
10513 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10514 t = gen_frame_mem (Pmode, t);
10515 insn = emit_insn (gen_push (t));
10516 RTX_FRAME_RELATED_P (insn) = 1;
10517
10518 /* For the purposes of frame and register save area addressing,
10519 we've started over with a new frame. */
10520 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10521 m->fs.realigned = true;
10522 }
10523
10524 if (frame_pointer_needed && !m->fs.fp_valid)
10525 {
10526 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10527 slower on all targets. Also sdb doesn't like it. */
10528 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10529 RTX_FRAME_RELATED_P (insn) = 1;
10530
10531 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10532 {
10533 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10534 RTX_FRAME_RELATED_P (insn) = 1;
10535
10536 if (m->fs.cfa_reg == stack_pointer_rtx)
10537 m->fs.cfa_reg = hard_frame_pointer_rtx;
10538 m->fs.fp_offset = m->fs.sp_offset;
10539 m->fs.fp_valid = true;
10540 }
10541 }
10542
10543 int_registers_saved = (frame.nregs == 0);
10544
10545 if (!int_registers_saved)
10546 {
10547 /* If saving registers via PUSH, do so now. */
10548 if (!frame.save_regs_using_mov)
10549 {
10550 ix86_emit_save_regs ();
10551 int_registers_saved = true;
10552 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10553 }
10554
10555 /* When using red zone we may start register saving before allocating
10556 the stack frame saving one cycle of the prologue. However, avoid
10557 doing this if we have to probe the stack; at least on x86_64 the
10558 stack probe can turn into a call that clobbers a red zone location. */
10559 else if (ix86_using_red_zone ()
10560 && (! TARGET_STACK_PROBE
10561 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10562 {
10563 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10564 int_registers_saved = true;
10565 }
10566 }
10567
10568 if (stack_realign_fp)
10569 {
10570 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10571 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10572
10573 /* The computation of the size of the re-aligned stack frame means
10574 that we must allocate the size of the register save area before
10575 performing the actual alignment. Otherwise we cannot guarantee
10576 that there's enough storage above the realignment point. */
10577 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10578 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10579 GEN_INT (m->fs.sp_offset
10580 - frame.sse_reg_save_offset),
10581 -1, false);
10582
10583 /* Align the stack. */
10584 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10585 stack_pointer_rtx,
10586 GEN_INT (-align_bytes)));
10587
10588 /* For the purposes of register save area addressing, the stack
10589 pointer is no longer valid. As for the value of sp_offset,
10590 see ix86_compute_frame_layout, which we need to match in order
10591 to pass verification of stack_pointer_offset at the end. */
10592 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10593 m->fs.sp_valid = false;
10594 }
10595
10596 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10597
10598 if (flag_stack_usage)
10599 {
10600 /* We start to count from ARG_POINTER. */
10601 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10602
10603 /* If it was realigned, take into account the fake frame. */
10604 if (stack_realign_drap)
10605 {
10606 if (ix86_static_chain_on_stack)
10607 stack_size += UNITS_PER_WORD;
10608
10609 if (!call_used_regs[REGNO (crtl->drap_reg)])
10610 stack_size += UNITS_PER_WORD;
10611
10612 /* This over-estimates by 1 minimal-stack-alignment-unit but
10613 mitigates that by counting in the new return address slot. */
10614 current_function_dynamic_stack_size
10615 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10616 }
10617
10618 current_function_static_stack_size = stack_size;
10619 }
10620
10621 /* The stack has already been decremented by the instruction calling us
10622 so we need to probe unconditionally to preserve the protection area. */
10623 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10624 {
10625 /* We expect the registers to be saved when probes are used. */
10626 gcc_assert (int_registers_saved);
10627
10628 if (STACK_CHECK_MOVING_SP)
10629 {
10630 ix86_adjust_stack_and_probe (allocate);
10631 allocate = 0;
10632 }
10633 else
10634 {
10635 HOST_WIDE_INT size = allocate;
10636
10637 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10638 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10639
10640 if (TARGET_STACK_PROBE)
10641 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10642 else
10643 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10644 }
10645 }
10646
10647 if (allocate == 0)
10648 ;
10649 else if (!ix86_target_stack_probe ()
10650 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10651 {
10652 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10653 GEN_INT (-allocate), -1,
10654 m->fs.cfa_reg == stack_pointer_rtx);
10655 }
10656 else
10657 {
10658 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10659 rtx r10 = NULL;
10660 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10661
10662 bool eax_live = false;
10663 bool r10_live = false;
10664
10665 if (TARGET_64BIT)
10666 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10667 if (!TARGET_64BIT_MS_ABI)
10668 eax_live = ix86_eax_live_at_start_p ();
10669
10670 if (eax_live)
10671 {
10672 emit_insn (gen_push (eax));
10673 allocate -= UNITS_PER_WORD;
10674 }
10675 if (r10_live)
10676 {
10677 r10 = gen_rtx_REG (Pmode, R10_REG);
10678 emit_insn (gen_push (r10));
10679 allocate -= UNITS_PER_WORD;
10680 }
10681
10682 emit_move_insn (eax, GEN_INT (allocate));
10683 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10684
10685 /* Use the fact that AX still contains ALLOCATE. */
10686 adjust_stack_insn = (TARGET_64BIT
10687 ? gen_pro_epilogue_adjust_stack_di_sub
10688 : gen_pro_epilogue_adjust_stack_si_sub);
10689
10690 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10691 stack_pointer_rtx, eax));
10692
10693 /* Note that SEH directives need to continue tracking the stack
10694 pointer even after the frame pointer has been set up. */
10695 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10696 {
10697 if (m->fs.cfa_reg == stack_pointer_rtx)
10698 m->fs.cfa_offset += allocate;
10699
10700 RTX_FRAME_RELATED_P (insn) = 1;
10701 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10702 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10703 plus_constant (stack_pointer_rtx,
10704 -allocate)));
10705 }
10706 m->fs.sp_offset += allocate;
10707
10708 if (r10_live && eax_live)
10709 {
10710 t = choose_baseaddr (m->fs.sp_offset - allocate);
10711 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10712 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10713 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10714 }
10715 else if (eax_live || r10_live)
10716 {
10717 t = choose_baseaddr (m->fs.sp_offset - allocate);
10718 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10719 }
10720 }
10721 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10722
10723 /* If we havn't already set up the frame pointer, do so now. */
10724 if (frame_pointer_needed && !m->fs.fp_valid)
10725 {
10726 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10727 GEN_INT (frame.stack_pointer_offset
10728 - frame.hard_frame_pointer_offset));
10729 insn = emit_insn (insn);
10730 RTX_FRAME_RELATED_P (insn) = 1;
10731 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10732
10733 if (m->fs.cfa_reg == stack_pointer_rtx)
10734 m->fs.cfa_reg = hard_frame_pointer_rtx;
10735 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10736 m->fs.fp_valid = true;
10737 }
10738
10739 if (!int_registers_saved)
10740 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10741 if (frame.nsseregs)
10742 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10743
10744 pic_reg_used = false;
10745 if (pic_offset_table_rtx
10746 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10747 || crtl->profile))
10748 {
10749 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10750
10751 if (alt_pic_reg_used != INVALID_REGNUM)
10752 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10753
10754 pic_reg_used = true;
10755 }
10756
10757 if (pic_reg_used)
10758 {
10759 if (TARGET_64BIT)
10760 {
10761 if (ix86_cmodel == CM_LARGE_PIC)
10762 {
10763 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10764 rtx label = gen_label_rtx ();
10765 emit_label (label);
10766 LABEL_PRESERVE_P (label) = 1;
10767 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10768 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10769 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10770 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10771 pic_offset_table_rtx, tmp_reg));
10772 }
10773 else
10774 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10775 }
10776 else
10777 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10778 }
10779
10780 /* In the pic_reg_used case, make sure that the got load isn't deleted
10781 when mcount needs it. Blockage to avoid call movement across mcount
10782 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10783 note. */
10784 if (crtl->profile && !flag_fentry && pic_reg_used)
10785 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10786
10787 if (crtl->drap_reg && !crtl->stack_realign_needed)
10788 {
10789 /* vDRAP is setup but after reload it turns out stack realign
10790 isn't necessary, here we will emit prologue to setup DRAP
10791 without stack realign adjustment */
10792 t = choose_baseaddr (0);
10793 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10794 }
10795
10796 /* Prevent instructions from being scheduled into register save push
10797 sequence when access to the redzone area is done through frame pointer.
10798 The offset between the frame pointer and the stack pointer is calculated
10799 relative to the value of the stack pointer at the end of the function
10800 prologue, and moving instructions that access redzone area via frame
10801 pointer inside push sequence violates this assumption. */
10802 if (frame_pointer_needed && frame.red_zone_size)
10803 emit_insn (gen_memory_blockage ());
10804
10805 /* Emit cld instruction if stringops are used in the function. */
10806 if (TARGET_CLD && ix86_current_function_needs_cld)
10807 emit_insn (gen_cld ());
10808
10809 /* SEH requires that the prologue end within 256 bytes of the start of
10810 the function. Prevent instruction schedules that would extend that. */
10811 if (TARGET_SEH)
10812 emit_insn (gen_blockage ());
10813 }
10814
10815 /* Emit code to restore REG using a POP insn. */
10816
10817 static void
10818 ix86_emit_restore_reg_using_pop (rtx reg)
10819 {
10820 struct machine_function *m = cfun->machine;
10821 rtx insn = emit_insn (gen_pop (reg));
10822
10823 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10824 m->fs.sp_offset -= UNITS_PER_WORD;
10825
10826 if (m->fs.cfa_reg == crtl->drap_reg
10827 && REGNO (reg) == REGNO (crtl->drap_reg))
10828 {
10829 /* Previously we'd represented the CFA as an expression
10830 like *(%ebp - 8). We've just popped that value from
10831 the stack, which means we need to reset the CFA to
10832 the drap register. This will remain until we restore
10833 the stack pointer. */
10834 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10835 RTX_FRAME_RELATED_P (insn) = 1;
10836
10837 /* This means that the DRAP register is valid for addressing too. */
10838 m->fs.drap_valid = true;
10839 return;
10840 }
10841
10842 if (m->fs.cfa_reg == stack_pointer_rtx)
10843 {
10844 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10845 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10846 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10847 RTX_FRAME_RELATED_P (insn) = 1;
10848
10849 m->fs.cfa_offset -= UNITS_PER_WORD;
10850 }
10851
10852 /* When the frame pointer is the CFA, and we pop it, we are
10853 swapping back to the stack pointer as the CFA. This happens
10854 for stack frames that don't allocate other data, so we assume
10855 the stack pointer is now pointing at the return address, i.e.
10856 the function entry state, which makes the offset be 1 word. */
10857 if (reg == hard_frame_pointer_rtx)
10858 {
10859 m->fs.fp_valid = false;
10860 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10861 {
10862 m->fs.cfa_reg = stack_pointer_rtx;
10863 m->fs.cfa_offset -= UNITS_PER_WORD;
10864
10865 add_reg_note (insn, REG_CFA_DEF_CFA,
10866 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10867 GEN_INT (m->fs.cfa_offset)));
10868 RTX_FRAME_RELATED_P (insn) = 1;
10869 }
10870 }
10871 }
10872
10873 /* Emit code to restore saved registers using POP insns. */
10874
10875 static void
10876 ix86_emit_restore_regs_using_pop (void)
10877 {
10878 unsigned int regno;
10879
10880 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10881 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10882 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10883 }
10884
10885 /* Emit code and notes for the LEAVE instruction. */
10886
10887 static void
10888 ix86_emit_leave (void)
10889 {
10890 struct machine_function *m = cfun->machine;
10891 rtx insn = emit_insn (ix86_gen_leave ());
10892
10893 ix86_add_queued_cfa_restore_notes (insn);
10894
10895 gcc_assert (m->fs.fp_valid);
10896 m->fs.sp_valid = true;
10897 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10898 m->fs.fp_valid = false;
10899
10900 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10901 {
10902 m->fs.cfa_reg = stack_pointer_rtx;
10903 m->fs.cfa_offset = m->fs.sp_offset;
10904
10905 add_reg_note (insn, REG_CFA_DEF_CFA,
10906 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10907 RTX_FRAME_RELATED_P (insn) = 1;
10908 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10909 m->fs.fp_offset);
10910 }
10911 }
10912
10913 /* Emit code to restore saved registers using MOV insns.
10914 First register is restored from CFA - CFA_OFFSET. */
10915 static void
10916 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10917 int maybe_eh_return)
10918 {
10919 struct machine_function *m = cfun->machine;
10920 unsigned int regno;
10921
10922 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10923 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10924 {
10925 rtx reg = gen_rtx_REG (Pmode, regno);
10926 rtx insn, mem;
10927
10928 mem = choose_baseaddr (cfa_offset);
10929 mem = gen_frame_mem (Pmode, mem);
10930 insn = emit_move_insn (reg, mem);
10931
10932 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10933 {
10934 /* Previously we'd represented the CFA as an expression
10935 like *(%ebp - 8). We've just popped that value from
10936 the stack, which means we need to reset the CFA to
10937 the drap register. This will remain until we restore
10938 the stack pointer. */
10939 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10940 RTX_FRAME_RELATED_P (insn) = 1;
10941
10942 /* This means that the DRAP register is valid for addressing. */
10943 m->fs.drap_valid = true;
10944 }
10945 else
10946 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10947
10948 cfa_offset -= UNITS_PER_WORD;
10949 }
10950 }
10951
10952 /* Emit code to restore saved registers using MOV insns.
10953 First register is restored from CFA - CFA_OFFSET. */
10954 static void
10955 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10956 int maybe_eh_return)
10957 {
10958 unsigned int regno;
10959
10960 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10961 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10962 {
10963 rtx reg = gen_rtx_REG (V4SFmode, regno);
10964 rtx mem;
10965
10966 mem = choose_baseaddr (cfa_offset);
10967 mem = gen_rtx_MEM (V4SFmode, mem);
10968 set_mem_align (mem, 128);
10969 emit_move_insn (reg, mem);
10970
10971 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10972
10973 cfa_offset -= 16;
10974 }
10975 }
10976
10977 /* Restore function stack, frame, and registers. */
10978
10979 void
10980 ix86_expand_epilogue (int style)
10981 {
10982 struct machine_function *m = cfun->machine;
10983 struct machine_frame_state frame_state_save = m->fs;
10984 struct ix86_frame frame;
10985 bool restore_regs_via_mov;
10986 bool using_drap;
10987
10988 ix86_finalize_stack_realign_flags ();
10989 ix86_compute_frame_layout (&frame);
10990
10991 m->fs.sp_valid = (!frame_pointer_needed
10992 || (current_function_sp_is_unchanging
10993 && !stack_realign_fp));
10994 gcc_assert (!m->fs.sp_valid
10995 || m->fs.sp_offset == frame.stack_pointer_offset);
10996
10997 /* The FP must be valid if the frame pointer is present. */
10998 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10999 gcc_assert (!m->fs.fp_valid
11000 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11001
11002 /* We must have *some* valid pointer to the stack frame. */
11003 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11004
11005 /* The DRAP is never valid at this point. */
11006 gcc_assert (!m->fs.drap_valid);
11007
11008 /* See the comment about red zone and frame
11009 pointer usage in ix86_expand_prologue. */
11010 if (frame_pointer_needed && frame.red_zone_size)
11011 emit_insn (gen_memory_blockage ());
11012
11013 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11014 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11015
11016 /* Determine the CFA offset of the end of the red-zone. */
11017 m->fs.red_zone_offset = 0;
11018 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11019 {
11020 /* The red-zone begins below the return address. */
11021 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11022
11023 /* When the register save area is in the aligned portion of
11024 the stack, determine the maximum runtime displacement that
11025 matches up with the aligned frame. */
11026 if (stack_realign_drap)
11027 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11028 + UNITS_PER_WORD);
11029 }
11030
11031 /* Special care must be taken for the normal return case of a function
11032 using eh_return: the eax and edx registers are marked as saved, but
11033 not restored along this path. Adjust the save location to match. */
11034 if (crtl->calls_eh_return && style != 2)
11035 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11036
11037 /* EH_RETURN requires the use of moves to function properly. */
11038 if (crtl->calls_eh_return)
11039 restore_regs_via_mov = true;
11040 /* SEH requires the use of pops to identify the epilogue. */
11041 else if (TARGET_SEH)
11042 restore_regs_via_mov = false;
11043 /* If we're only restoring one register and sp is not valid then
11044 using a move instruction to restore the register since it's
11045 less work than reloading sp and popping the register. */
11046 else if (!m->fs.sp_valid && frame.nregs <= 1)
11047 restore_regs_via_mov = true;
11048 else if (TARGET_EPILOGUE_USING_MOVE
11049 && cfun->machine->use_fast_prologue_epilogue
11050 && (frame.nregs > 1
11051 || m->fs.sp_offset != frame.reg_save_offset))
11052 restore_regs_via_mov = true;
11053 else if (frame_pointer_needed
11054 && !frame.nregs
11055 && m->fs.sp_offset != frame.reg_save_offset)
11056 restore_regs_via_mov = true;
11057 else if (frame_pointer_needed
11058 && TARGET_USE_LEAVE
11059 && cfun->machine->use_fast_prologue_epilogue
11060 && frame.nregs == 1)
11061 restore_regs_via_mov = true;
11062 else
11063 restore_regs_via_mov = false;
11064
11065 if (restore_regs_via_mov || frame.nsseregs)
11066 {
11067 /* Ensure that the entire register save area is addressable via
11068 the stack pointer, if we will restore via sp. */
11069 if (TARGET_64BIT
11070 && m->fs.sp_offset > 0x7fffffff
11071 && !(m->fs.fp_valid || m->fs.drap_valid)
11072 && (frame.nsseregs + frame.nregs) != 0)
11073 {
11074 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11075 GEN_INT (m->fs.sp_offset
11076 - frame.sse_reg_save_offset),
11077 style,
11078 m->fs.cfa_reg == stack_pointer_rtx);
11079 }
11080 }
11081
11082 /* If there are any SSE registers to restore, then we have to do it
11083 via moves, since there's obviously no pop for SSE regs. */
11084 if (frame.nsseregs)
11085 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11086 style == 2);
11087
11088 if (restore_regs_via_mov)
11089 {
11090 rtx t;
11091
11092 if (frame.nregs)
11093 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11094
11095 /* eh_return epilogues need %ecx added to the stack pointer. */
11096 if (style == 2)
11097 {
11098 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11099
11100 /* Stack align doesn't work with eh_return. */
11101 gcc_assert (!stack_realign_drap);
11102 /* Neither does regparm nested functions. */
11103 gcc_assert (!ix86_static_chain_on_stack);
11104
11105 if (frame_pointer_needed)
11106 {
11107 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11108 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11109 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11110
11111 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11112 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11113
11114 /* Note that we use SA as a temporary CFA, as the return
11115 address is at the proper place relative to it. We
11116 pretend this happens at the FP restore insn because
11117 prior to this insn the FP would be stored at the wrong
11118 offset relative to SA, and after this insn we have no
11119 other reasonable register to use for the CFA. We don't
11120 bother resetting the CFA to the SP for the duration of
11121 the return insn. */
11122 add_reg_note (insn, REG_CFA_DEF_CFA,
11123 plus_constant (sa, UNITS_PER_WORD));
11124 ix86_add_queued_cfa_restore_notes (insn);
11125 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11126 RTX_FRAME_RELATED_P (insn) = 1;
11127
11128 m->fs.cfa_reg = sa;
11129 m->fs.cfa_offset = UNITS_PER_WORD;
11130 m->fs.fp_valid = false;
11131
11132 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11133 const0_rtx, style, false);
11134 }
11135 else
11136 {
11137 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11138 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11139 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11140 ix86_add_queued_cfa_restore_notes (insn);
11141
11142 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11143 if (m->fs.cfa_offset != UNITS_PER_WORD)
11144 {
11145 m->fs.cfa_offset = UNITS_PER_WORD;
11146 add_reg_note (insn, REG_CFA_DEF_CFA,
11147 plus_constant (stack_pointer_rtx,
11148 UNITS_PER_WORD));
11149 RTX_FRAME_RELATED_P (insn) = 1;
11150 }
11151 }
11152 m->fs.sp_offset = UNITS_PER_WORD;
11153 m->fs.sp_valid = true;
11154 }
11155 }
11156 else
11157 {
11158 /* SEH requires that the function end with (1) a stack adjustment
11159 if necessary, (2) a sequence of pops, and (3) a return or
11160 jump instruction. Prevent insns from the function body from
11161 being scheduled into this sequence. */
11162 if (TARGET_SEH)
11163 {
11164 /* Prevent a catch region from being adjacent to the standard
11165 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11166 several other flags that would be interesting to test are
11167 not yet set up. */
11168 if (flag_non_call_exceptions)
11169 emit_insn (gen_nops (const1_rtx));
11170 else
11171 emit_insn (gen_blockage ());
11172 }
11173
11174 /* First step is to deallocate the stack frame so that we can
11175 pop the registers. */
11176 if (!m->fs.sp_valid)
11177 {
11178 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11179 GEN_INT (m->fs.fp_offset
11180 - frame.reg_save_offset),
11181 style, false);
11182 }
11183 else if (m->fs.sp_offset != frame.reg_save_offset)
11184 {
11185 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11186 GEN_INT (m->fs.sp_offset
11187 - frame.reg_save_offset),
11188 style,
11189 m->fs.cfa_reg == stack_pointer_rtx);
11190 }
11191
11192 ix86_emit_restore_regs_using_pop ();
11193 }
11194
11195 /* If we used a stack pointer and haven't already got rid of it,
11196 then do so now. */
11197 if (m->fs.fp_valid)
11198 {
11199 /* If the stack pointer is valid and pointing at the frame
11200 pointer store address, then we only need a pop. */
11201 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11202 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11203 /* Leave results in shorter dependency chains on CPUs that are
11204 able to grok it fast. */
11205 else if (TARGET_USE_LEAVE
11206 || optimize_function_for_size_p (cfun)
11207 || !cfun->machine->use_fast_prologue_epilogue)
11208 ix86_emit_leave ();
11209 else
11210 {
11211 pro_epilogue_adjust_stack (stack_pointer_rtx,
11212 hard_frame_pointer_rtx,
11213 const0_rtx, style, !using_drap);
11214 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11215 }
11216 }
11217
11218 if (using_drap)
11219 {
11220 int param_ptr_offset = UNITS_PER_WORD;
11221 rtx insn;
11222
11223 gcc_assert (stack_realign_drap);
11224
11225 if (ix86_static_chain_on_stack)
11226 param_ptr_offset += UNITS_PER_WORD;
11227 if (!call_used_regs[REGNO (crtl->drap_reg)])
11228 param_ptr_offset += UNITS_PER_WORD;
11229
11230 insn = emit_insn (gen_rtx_SET
11231 (VOIDmode, stack_pointer_rtx,
11232 gen_rtx_PLUS (Pmode,
11233 crtl->drap_reg,
11234 GEN_INT (-param_ptr_offset))));
11235 m->fs.cfa_reg = stack_pointer_rtx;
11236 m->fs.cfa_offset = param_ptr_offset;
11237 m->fs.sp_offset = param_ptr_offset;
11238 m->fs.realigned = false;
11239
11240 add_reg_note (insn, REG_CFA_DEF_CFA,
11241 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11242 GEN_INT (param_ptr_offset)));
11243 RTX_FRAME_RELATED_P (insn) = 1;
11244
11245 if (!call_used_regs[REGNO (crtl->drap_reg)])
11246 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11247 }
11248
11249 /* At this point the stack pointer must be valid, and we must have
11250 restored all of the registers. We may not have deallocated the
11251 entire stack frame. We've delayed this until now because it may
11252 be possible to merge the local stack deallocation with the
11253 deallocation forced by ix86_static_chain_on_stack. */
11254 gcc_assert (m->fs.sp_valid);
11255 gcc_assert (!m->fs.fp_valid);
11256 gcc_assert (!m->fs.realigned);
11257 if (m->fs.sp_offset != UNITS_PER_WORD)
11258 {
11259 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11260 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11261 style, true);
11262 }
11263
11264 /* Sibcall epilogues don't want a return instruction. */
11265 if (style == 0)
11266 {
11267 m->fs = frame_state_save;
11268 return;
11269 }
11270
11271 /* Emit vzeroupper if needed. */
11272 if (TARGET_VZEROUPPER
11273 && !TREE_THIS_VOLATILE (cfun->decl)
11274 && !cfun->machine->caller_return_avx256_p)
11275 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11276
11277 if (crtl->args.pops_args && crtl->args.size)
11278 {
11279 rtx popc = GEN_INT (crtl->args.pops_args);
11280
11281 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11282 address, do explicit add, and jump indirectly to the caller. */
11283
11284 if (crtl->args.pops_args >= 65536)
11285 {
11286 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11287 rtx insn;
11288
11289 /* There is no "pascal" calling convention in any 64bit ABI. */
11290 gcc_assert (!TARGET_64BIT);
11291
11292 insn = emit_insn (gen_pop (ecx));
11293 m->fs.cfa_offset -= UNITS_PER_WORD;
11294 m->fs.sp_offset -= UNITS_PER_WORD;
11295
11296 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11297 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11298 add_reg_note (insn, REG_CFA_REGISTER,
11299 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11300 RTX_FRAME_RELATED_P (insn) = 1;
11301
11302 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11303 popc, -1, true);
11304 emit_jump_insn (gen_return_indirect_internal (ecx));
11305 }
11306 else
11307 emit_jump_insn (gen_return_pop_internal (popc));
11308 }
11309 else
11310 emit_jump_insn (gen_return_internal ());
11311
11312 /* Restore the state back to the state from the prologue,
11313 so that it's correct for the next epilogue. */
11314 m->fs = frame_state_save;
11315 }
11316
11317 /* Reset from the function's potential modifications. */
11318
11319 static void
11320 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11321 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11322 {
11323 if (pic_offset_table_rtx)
11324 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11325 #if TARGET_MACHO
11326 /* Mach-O doesn't support labels at the end of objects, so if
11327 it looks like we might want one, insert a NOP. */
11328 {
11329 rtx insn = get_last_insn ();
11330 while (insn
11331 && NOTE_P (insn)
11332 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11333 insn = PREV_INSN (insn);
11334 if (insn
11335 && (LABEL_P (insn)
11336 || (NOTE_P (insn)
11337 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11338 fputs ("\tnop\n", file);
11339 }
11340 #endif
11341
11342 }
11343
11344 /* Return a scratch register to use in the split stack prologue. The
11345 split stack prologue is used for -fsplit-stack. It is the first
11346 instructions in the function, even before the regular prologue.
11347 The scratch register can be any caller-saved register which is not
11348 used for parameters or for the static chain. */
11349
11350 static unsigned int
11351 split_stack_prologue_scratch_regno (void)
11352 {
11353 if (TARGET_64BIT)
11354 return R11_REG;
11355 else
11356 {
11357 bool is_fastcall;
11358 int regparm;
11359
11360 is_fastcall = (lookup_attribute ("fastcall",
11361 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11362 != NULL);
11363 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11364
11365 if (is_fastcall)
11366 {
11367 if (DECL_STATIC_CHAIN (cfun->decl))
11368 {
11369 sorry ("-fsplit-stack does not support fastcall with "
11370 "nested function");
11371 return INVALID_REGNUM;
11372 }
11373 return AX_REG;
11374 }
11375 else if (regparm < 3)
11376 {
11377 if (!DECL_STATIC_CHAIN (cfun->decl))
11378 return CX_REG;
11379 else
11380 {
11381 if (regparm >= 2)
11382 {
11383 sorry ("-fsplit-stack does not support 2 register "
11384 " parameters for a nested function");
11385 return INVALID_REGNUM;
11386 }
11387 return DX_REG;
11388 }
11389 }
11390 else
11391 {
11392 /* FIXME: We could make this work by pushing a register
11393 around the addition and comparison. */
11394 sorry ("-fsplit-stack does not support 3 register parameters");
11395 return INVALID_REGNUM;
11396 }
11397 }
11398 }
11399
11400 /* A SYMBOL_REF for the function which allocates new stackspace for
11401 -fsplit-stack. */
11402
11403 static GTY(()) rtx split_stack_fn;
11404
11405 /* A SYMBOL_REF for the more stack function when using the large
11406 model. */
11407
11408 static GTY(()) rtx split_stack_fn_large;
11409
11410 /* Handle -fsplit-stack. These are the first instructions in the
11411 function, even before the regular prologue. */
11412
11413 void
11414 ix86_expand_split_stack_prologue (void)
11415 {
11416 struct ix86_frame frame;
11417 HOST_WIDE_INT allocate;
11418 unsigned HOST_WIDE_INT args_size;
11419 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11420 rtx scratch_reg = NULL_RTX;
11421 rtx varargs_label = NULL_RTX;
11422 rtx fn;
11423
11424 gcc_assert (flag_split_stack && reload_completed);
11425
11426 ix86_finalize_stack_realign_flags ();
11427 ix86_compute_frame_layout (&frame);
11428 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11429
11430 /* This is the label we will branch to if we have enough stack
11431 space. We expect the basic block reordering pass to reverse this
11432 branch if optimizing, so that we branch in the unlikely case. */
11433 label = gen_label_rtx ();
11434
11435 /* We need to compare the stack pointer minus the frame size with
11436 the stack boundary in the TCB. The stack boundary always gives
11437 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11438 can compare directly. Otherwise we need to do an addition. */
11439
11440 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11441 UNSPEC_STACK_CHECK);
11442 limit = gen_rtx_CONST (Pmode, limit);
11443 limit = gen_rtx_MEM (Pmode, limit);
11444 if (allocate < SPLIT_STACK_AVAILABLE)
11445 current = stack_pointer_rtx;
11446 else
11447 {
11448 unsigned int scratch_regno;
11449 rtx offset;
11450
11451 /* We need a scratch register to hold the stack pointer minus
11452 the required frame size. Since this is the very start of the
11453 function, the scratch register can be any caller-saved
11454 register which is not used for parameters. */
11455 offset = GEN_INT (- allocate);
11456 scratch_regno = split_stack_prologue_scratch_regno ();
11457 if (scratch_regno == INVALID_REGNUM)
11458 return;
11459 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11460 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11461 {
11462 /* We don't use ix86_gen_add3 in this case because it will
11463 want to split to lea, but when not optimizing the insn
11464 will not be split after this point. */
11465 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11466 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11467 offset)));
11468 }
11469 else
11470 {
11471 emit_move_insn (scratch_reg, offset);
11472 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11473 stack_pointer_rtx));
11474 }
11475 current = scratch_reg;
11476 }
11477
11478 ix86_expand_branch (GEU, current, limit, label);
11479 jump_insn = get_last_insn ();
11480 JUMP_LABEL (jump_insn) = label;
11481
11482 /* Mark the jump as very likely to be taken. */
11483 add_reg_note (jump_insn, REG_BR_PROB,
11484 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11485
11486 if (split_stack_fn == NULL_RTX)
11487 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11488 fn = split_stack_fn;
11489
11490 /* Get more stack space. We pass in the desired stack space and the
11491 size of the arguments to copy to the new stack. In 32-bit mode
11492 we push the parameters; __morestack will return on a new stack
11493 anyhow. In 64-bit mode we pass the parameters in r10 and
11494 r11. */
11495 allocate_rtx = GEN_INT (allocate);
11496 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11497 call_fusage = NULL_RTX;
11498 if (TARGET_64BIT)
11499 {
11500 rtx reg10, reg11;
11501
11502 reg10 = gen_rtx_REG (Pmode, R10_REG);
11503 reg11 = gen_rtx_REG (Pmode, R11_REG);
11504
11505 /* If this function uses a static chain, it will be in %r10.
11506 Preserve it across the call to __morestack. */
11507 if (DECL_STATIC_CHAIN (cfun->decl))
11508 {
11509 rtx rax;
11510
11511 rax = gen_rtx_REG (Pmode, AX_REG);
11512 emit_move_insn (rax, reg10);
11513 use_reg (&call_fusage, rax);
11514 }
11515
11516 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11517 {
11518 HOST_WIDE_INT argval;
11519
11520 /* When using the large model we need to load the address
11521 into a register, and we've run out of registers. So we
11522 switch to a different calling convention, and we call a
11523 different function: __morestack_large. We pass the
11524 argument size in the upper 32 bits of r10 and pass the
11525 frame size in the lower 32 bits. */
11526 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11527 gcc_assert ((args_size & 0xffffffff) == args_size);
11528
11529 if (split_stack_fn_large == NULL_RTX)
11530 split_stack_fn_large =
11531 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11532
11533 if (ix86_cmodel == CM_LARGE_PIC)
11534 {
11535 rtx label, x;
11536
11537 label = gen_label_rtx ();
11538 emit_label (label);
11539 LABEL_PRESERVE_P (label) = 1;
11540 emit_insn (gen_set_rip_rex64 (reg10, label));
11541 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11542 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11543 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11544 UNSPEC_GOT);
11545 x = gen_rtx_CONST (Pmode, x);
11546 emit_move_insn (reg11, x);
11547 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11548 x = gen_const_mem (Pmode, x);
11549 emit_move_insn (reg11, x);
11550 }
11551 else
11552 emit_move_insn (reg11, split_stack_fn_large);
11553
11554 fn = reg11;
11555
11556 argval = ((args_size << 16) << 16) + allocate;
11557 emit_move_insn (reg10, GEN_INT (argval));
11558 }
11559 else
11560 {
11561 emit_move_insn (reg10, allocate_rtx);
11562 emit_move_insn (reg11, GEN_INT (args_size));
11563 use_reg (&call_fusage, reg11);
11564 }
11565
11566 use_reg (&call_fusage, reg10);
11567 }
11568 else
11569 {
11570 emit_insn (gen_push (GEN_INT (args_size)));
11571 emit_insn (gen_push (allocate_rtx));
11572 }
11573 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11574 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11575 NULL_RTX, 0);
11576 add_function_usage_to (call_insn, call_fusage);
11577
11578 /* In order to make call/return prediction work right, we now need
11579 to execute a return instruction. See
11580 libgcc/config/i386/morestack.S for the details on how this works.
11581
11582 For flow purposes gcc must not see this as a return
11583 instruction--we need control flow to continue at the subsequent
11584 label. Therefore, we use an unspec. */
11585 gcc_assert (crtl->args.pops_args < 65536);
11586 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11587
11588 /* If we are in 64-bit mode and this function uses a static chain,
11589 we saved %r10 in %rax before calling _morestack. */
11590 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11591 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11592 gen_rtx_REG (Pmode, AX_REG));
11593
11594 /* If this function calls va_start, we need to store a pointer to
11595 the arguments on the old stack, because they may not have been
11596 all copied to the new stack. At this point the old stack can be
11597 found at the frame pointer value used by __morestack, because
11598 __morestack has set that up before calling back to us. Here we
11599 store that pointer in a scratch register, and in
11600 ix86_expand_prologue we store the scratch register in a stack
11601 slot. */
11602 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11603 {
11604 unsigned int scratch_regno;
11605 rtx frame_reg;
11606 int words;
11607
11608 scratch_regno = split_stack_prologue_scratch_regno ();
11609 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11610 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11611
11612 /* 64-bit:
11613 fp -> old fp value
11614 return address within this function
11615 return address of caller of this function
11616 stack arguments
11617 So we add three words to get to the stack arguments.
11618
11619 32-bit:
11620 fp -> old fp value
11621 return address within this function
11622 first argument to __morestack
11623 second argument to __morestack
11624 return address of caller of this function
11625 stack arguments
11626 So we add five words to get to the stack arguments.
11627 */
11628 words = TARGET_64BIT ? 3 : 5;
11629 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11630 gen_rtx_PLUS (Pmode, frame_reg,
11631 GEN_INT (words * UNITS_PER_WORD))));
11632
11633 varargs_label = gen_label_rtx ();
11634 emit_jump_insn (gen_jump (varargs_label));
11635 JUMP_LABEL (get_last_insn ()) = varargs_label;
11636
11637 emit_barrier ();
11638 }
11639
11640 emit_label (label);
11641 LABEL_NUSES (label) = 1;
11642
11643 /* If this function calls va_start, we now have to set the scratch
11644 register for the case where we do not call __morestack. In this
11645 case we need to set it based on the stack pointer. */
11646 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11647 {
11648 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11649 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11650 GEN_INT (UNITS_PER_WORD))));
11651
11652 emit_label (varargs_label);
11653 LABEL_NUSES (varargs_label) = 1;
11654 }
11655 }
11656
11657 /* We may have to tell the dataflow pass that the split stack prologue
11658 is initializing a scratch register. */
11659
11660 static void
11661 ix86_live_on_entry (bitmap regs)
11662 {
11663 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11664 {
11665 gcc_assert (flag_split_stack);
11666 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11667 }
11668 }
11669 \f
11670 /* Extract the parts of an RTL expression that is a valid memory address
11671 for an instruction. Return 0 if the structure of the address is
11672 grossly off. Return -1 if the address contains ASHIFT, so it is not
11673 strictly valid, but still used for computing length of lea instruction. */
11674
11675 int
11676 ix86_decompose_address (rtx addr, struct ix86_address *out)
11677 {
11678 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11679 rtx base_reg, index_reg;
11680 HOST_WIDE_INT scale = 1;
11681 rtx scale_rtx = NULL_RTX;
11682 rtx tmp;
11683 int retval = 1;
11684 enum ix86_address_seg seg = SEG_DEFAULT;
11685
11686 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11687 base = addr;
11688 else if (GET_CODE (addr) == PLUS)
11689 {
11690 rtx addends[4], op;
11691 int n = 0, i;
11692
11693 op = addr;
11694 do
11695 {
11696 if (n >= 4)
11697 return 0;
11698 addends[n++] = XEXP (op, 1);
11699 op = XEXP (op, 0);
11700 }
11701 while (GET_CODE (op) == PLUS);
11702 if (n >= 4)
11703 return 0;
11704 addends[n] = op;
11705
11706 for (i = n; i >= 0; --i)
11707 {
11708 op = addends[i];
11709 switch (GET_CODE (op))
11710 {
11711 case MULT:
11712 if (index)
11713 return 0;
11714 index = XEXP (op, 0);
11715 scale_rtx = XEXP (op, 1);
11716 break;
11717
11718 case ASHIFT:
11719 if (index)
11720 return 0;
11721 index = XEXP (op, 0);
11722 tmp = XEXP (op, 1);
11723 if (!CONST_INT_P (tmp))
11724 return 0;
11725 scale = INTVAL (tmp);
11726 if ((unsigned HOST_WIDE_INT) scale > 3)
11727 return 0;
11728 scale = 1 << scale;
11729 break;
11730
11731 case UNSPEC:
11732 if (XINT (op, 1) == UNSPEC_TP
11733 && TARGET_TLS_DIRECT_SEG_REFS
11734 && seg == SEG_DEFAULT)
11735 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11736 else
11737 return 0;
11738 break;
11739
11740 case REG:
11741 case SUBREG:
11742 if (!base)
11743 base = op;
11744 else if (!index)
11745 index = op;
11746 else
11747 return 0;
11748 break;
11749
11750 case CONST:
11751 case CONST_INT:
11752 case SYMBOL_REF:
11753 case LABEL_REF:
11754 if (disp)
11755 return 0;
11756 disp = op;
11757 break;
11758
11759 default:
11760 return 0;
11761 }
11762 }
11763 }
11764 else if (GET_CODE (addr) == MULT)
11765 {
11766 index = XEXP (addr, 0); /* index*scale */
11767 scale_rtx = XEXP (addr, 1);
11768 }
11769 else if (GET_CODE (addr) == ASHIFT)
11770 {
11771 /* We're called for lea too, which implements ashift on occasion. */
11772 index = XEXP (addr, 0);
11773 tmp = XEXP (addr, 1);
11774 if (!CONST_INT_P (tmp))
11775 return 0;
11776 scale = INTVAL (tmp);
11777 if ((unsigned HOST_WIDE_INT) scale > 3)
11778 return 0;
11779 scale = 1 << scale;
11780 retval = -1;
11781 }
11782 else
11783 disp = addr; /* displacement */
11784
11785 /* Extract the integral value of scale. */
11786 if (scale_rtx)
11787 {
11788 if (!CONST_INT_P (scale_rtx))
11789 return 0;
11790 scale = INTVAL (scale_rtx);
11791 }
11792
11793 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11794 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11795
11796 /* Avoid useless 0 displacement. */
11797 if (disp == const0_rtx && (base || index))
11798 disp = NULL_RTX;
11799
11800 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11801 if (base_reg && index_reg && scale == 1
11802 && (index_reg == arg_pointer_rtx
11803 || index_reg == frame_pointer_rtx
11804 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11805 {
11806 rtx tmp;
11807 tmp = base, base = index, index = tmp;
11808 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11809 }
11810
11811 /* Special case: %ebp cannot be encoded as a base without a displacement.
11812 Similarly %r13. */
11813 if (!disp
11814 && base_reg
11815 && (base_reg == hard_frame_pointer_rtx
11816 || base_reg == frame_pointer_rtx
11817 || base_reg == arg_pointer_rtx
11818 || (REG_P (base_reg)
11819 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11820 || REGNO (base_reg) == R13_REG))))
11821 disp = const0_rtx;
11822
11823 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11824 Avoid this by transforming to [%esi+0].
11825 Reload calls address legitimization without cfun defined, so we need
11826 to test cfun for being non-NULL. */
11827 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11828 && base_reg && !index_reg && !disp
11829 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11830 disp = const0_rtx;
11831
11832 /* Special case: encode reg+reg instead of reg*2. */
11833 if (!base && index && scale == 2)
11834 base = index, base_reg = index_reg, scale = 1;
11835
11836 /* Special case: scaling cannot be encoded without base or displacement. */
11837 if (!base && !disp && index && scale != 1)
11838 disp = const0_rtx;
11839
11840 out->base = base;
11841 out->index = index;
11842 out->disp = disp;
11843 out->scale = scale;
11844 out->seg = seg;
11845
11846 return retval;
11847 }
11848 \f
11849 /* Return cost of the memory address x.
11850 For i386, it is better to use a complex address than let gcc copy
11851 the address into a reg and make a new pseudo. But not if the address
11852 requires to two regs - that would mean more pseudos with longer
11853 lifetimes. */
11854 static int
11855 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11856 {
11857 struct ix86_address parts;
11858 int cost = 1;
11859 int ok = ix86_decompose_address (x, &parts);
11860
11861 gcc_assert (ok);
11862
11863 if (parts.base && GET_CODE (parts.base) == SUBREG)
11864 parts.base = SUBREG_REG (parts.base);
11865 if (parts.index && GET_CODE (parts.index) == SUBREG)
11866 parts.index = SUBREG_REG (parts.index);
11867
11868 /* Attempt to minimize number of registers in the address. */
11869 if ((parts.base
11870 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11871 || (parts.index
11872 && (!REG_P (parts.index)
11873 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11874 cost++;
11875
11876 if (parts.base
11877 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11878 && parts.index
11879 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11880 && parts.base != parts.index)
11881 cost++;
11882
11883 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11884 since it's predecode logic can't detect the length of instructions
11885 and it degenerates to vector decoded. Increase cost of such
11886 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11887 to split such addresses or even refuse such addresses at all.
11888
11889 Following addressing modes are affected:
11890 [base+scale*index]
11891 [scale*index+disp]
11892 [base+index]
11893
11894 The first and last case may be avoidable by explicitly coding the zero in
11895 memory address, but I don't have AMD-K6 machine handy to check this
11896 theory. */
11897
11898 if (TARGET_K6
11899 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11900 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11901 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11902 cost += 10;
11903
11904 return cost;
11905 }
11906 \f
11907 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11908 this is used for to form addresses to local data when -fPIC is in
11909 use. */
11910
11911 static bool
11912 darwin_local_data_pic (rtx disp)
11913 {
11914 return (GET_CODE (disp) == UNSPEC
11915 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11916 }
11917
11918 /* Determine if a given RTX is a valid constant. We already know this
11919 satisfies CONSTANT_P. */
11920
11921 bool
11922 legitimate_constant_p (rtx x)
11923 {
11924 switch (GET_CODE (x))
11925 {
11926 case CONST:
11927 x = XEXP (x, 0);
11928
11929 if (GET_CODE (x) == PLUS)
11930 {
11931 if (!CONST_INT_P (XEXP (x, 1)))
11932 return false;
11933 x = XEXP (x, 0);
11934 }
11935
11936 if (TARGET_MACHO && darwin_local_data_pic (x))
11937 return true;
11938
11939 /* Only some unspecs are valid as "constants". */
11940 if (GET_CODE (x) == UNSPEC)
11941 switch (XINT (x, 1))
11942 {
11943 case UNSPEC_GOT:
11944 case UNSPEC_GOTOFF:
11945 case UNSPEC_PLTOFF:
11946 return TARGET_64BIT;
11947 case UNSPEC_TPOFF:
11948 case UNSPEC_NTPOFF:
11949 x = XVECEXP (x, 0, 0);
11950 return (GET_CODE (x) == SYMBOL_REF
11951 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11952 case UNSPEC_DTPOFF:
11953 x = XVECEXP (x, 0, 0);
11954 return (GET_CODE (x) == SYMBOL_REF
11955 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11956 default:
11957 return false;
11958 }
11959
11960 /* We must have drilled down to a symbol. */
11961 if (GET_CODE (x) == LABEL_REF)
11962 return true;
11963 if (GET_CODE (x) != SYMBOL_REF)
11964 return false;
11965 /* FALLTHRU */
11966
11967 case SYMBOL_REF:
11968 /* TLS symbols are never valid. */
11969 if (SYMBOL_REF_TLS_MODEL (x))
11970 return false;
11971
11972 /* DLLIMPORT symbols are never valid. */
11973 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11974 && SYMBOL_REF_DLLIMPORT_P (x))
11975 return false;
11976
11977 #if TARGET_MACHO
11978 /* mdynamic-no-pic */
11979 if (MACHO_DYNAMIC_NO_PIC_P)
11980 return machopic_symbol_defined_p (x);
11981 #endif
11982 break;
11983
11984 case CONST_DOUBLE:
11985 if (GET_MODE (x) == TImode
11986 && x != CONST0_RTX (TImode)
11987 && !TARGET_64BIT)
11988 return false;
11989 break;
11990
11991 case CONST_VECTOR:
11992 if (!standard_sse_constant_p (x))
11993 return false;
11994
11995 default:
11996 break;
11997 }
11998
11999 /* Otherwise we handle everything else in the move patterns. */
12000 return true;
12001 }
12002
12003 /* Determine if it's legal to put X into the constant pool. This
12004 is not possible for the address of thread-local symbols, which
12005 is checked above. */
12006
12007 static bool
12008 ix86_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12009 {
12010 /* We can always put integral constants and vectors in memory. */
12011 switch (GET_CODE (x))
12012 {
12013 case CONST_INT:
12014 case CONST_DOUBLE:
12015 case CONST_VECTOR:
12016 return false;
12017
12018 default:
12019 break;
12020 }
12021 return !legitimate_constant_p (x);
12022 }
12023
12024
12025 /* Nonzero if the constant value X is a legitimate general operand
12026 when generating PIC code. It is given that flag_pic is on and
12027 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12028
12029 bool
12030 legitimate_pic_operand_p (rtx x)
12031 {
12032 rtx inner;
12033
12034 switch (GET_CODE (x))
12035 {
12036 case CONST:
12037 inner = XEXP (x, 0);
12038 if (GET_CODE (inner) == PLUS
12039 && CONST_INT_P (XEXP (inner, 1)))
12040 inner = XEXP (inner, 0);
12041
12042 /* Only some unspecs are valid as "constants". */
12043 if (GET_CODE (inner) == UNSPEC)
12044 switch (XINT (inner, 1))
12045 {
12046 case UNSPEC_GOT:
12047 case UNSPEC_GOTOFF:
12048 case UNSPEC_PLTOFF:
12049 return TARGET_64BIT;
12050 case UNSPEC_TPOFF:
12051 x = XVECEXP (inner, 0, 0);
12052 return (GET_CODE (x) == SYMBOL_REF
12053 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12054 case UNSPEC_MACHOPIC_OFFSET:
12055 return legitimate_pic_address_disp_p (x);
12056 default:
12057 return false;
12058 }
12059 /* FALLTHRU */
12060
12061 case SYMBOL_REF:
12062 case LABEL_REF:
12063 return legitimate_pic_address_disp_p (x);
12064
12065 default:
12066 return true;
12067 }
12068 }
12069
12070 /* Determine if a given CONST RTX is a valid memory displacement
12071 in PIC mode. */
12072
12073 bool
12074 legitimate_pic_address_disp_p (rtx disp)
12075 {
12076 bool saw_plus;
12077
12078 /* In 64bit mode we can allow direct addresses of symbols and labels
12079 when they are not dynamic symbols. */
12080 if (TARGET_64BIT)
12081 {
12082 rtx op0 = disp, op1;
12083
12084 switch (GET_CODE (disp))
12085 {
12086 case LABEL_REF:
12087 return true;
12088
12089 case CONST:
12090 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12091 break;
12092 op0 = XEXP (XEXP (disp, 0), 0);
12093 op1 = XEXP (XEXP (disp, 0), 1);
12094 if (!CONST_INT_P (op1)
12095 || INTVAL (op1) >= 16*1024*1024
12096 || INTVAL (op1) < -16*1024*1024)
12097 break;
12098 if (GET_CODE (op0) == LABEL_REF)
12099 return true;
12100 if (GET_CODE (op0) != SYMBOL_REF)
12101 break;
12102 /* FALLTHRU */
12103
12104 case SYMBOL_REF:
12105 /* TLS references should always be enclosed in UNSPEC. */
12106 if (SYMBOL_REF_TLS_MODEL (op0))
12107 return false;
12108 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12109 && ix86_cmodel != CM_LARGE_PIC)
12110 return true;
12111 break;
12112
12113 default:
12114 break;
12115 }
12116 }
12117 if (GET_CODE (disp) != CONST)
12118 return false;
12119 disp = XEXP (disp, 0);
12120
12121 if (TARGET_64BIT)
12122 {
12123 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12124 of GOT tables. We should not need these anyway. */
12125 if (GET_CODE (disp) != UNSPEC
12126 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12127 && XINT (disp, 1) != UNSPEC_GOTOFF
12128 && XINT (disp, 1) != UNSPEC_PCREL
12129 && XINT (disp, 1) != UNSPEC_PLTOFF))
12130 return false;
12131
12132 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12133 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12134 return false;
12135 return true;
12136 }
12137
12138 saw_plus = false;
12139 if (GET_CODE (disp) == PLUS)
12140 {
12141 if (!CONST_INT_P (XEXP (disp, 1)))
12142 return false;
12143 disp = XEXP (disp, 0);
12144 saw_plus = true;
12145 }
12146
12147 if (TARGET_MACHO && darwin_local_data_pic (disp))
12148 return true;
12149
12150 if (GET_CODE (disp) != UNSPEC)
12151 return false;
12152
12153 switch (XINT (disp, 1))
12154 {
12155 case UNSPEC_GOT:
12156 if (saw_plus)
12157 return false;
12158 /* We need to check for both symbols and labels because VxWorks loads
12159 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12160 details. */
12161 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12162 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12163 case UNSPEC_GOTOFF:
12164 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12165 While ABI specify also 32bit relocation but we don't produce it in
12166 small PIC model at all. */
12167 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12168 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12169 && !TARGET_64BIT)
12170 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12171 return false;
12172 case UNSPEC_GOTTPOFF:
12173 case UNSPEC_GOTNTPOFF:
12174 case UNSPEC_INDNTPOFF:
12175 if (saw_plus)
12176 return false;
12177 disp = XVECEXP (disp, 0, 0);
12178 return (GET_CODE (disp) == SYMBOL_REF
12179 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12180 case UNSPEC_NTPOFF:
12181 disp = XVECEXP (disp, 0, 0);
12182 return (GET_CODE (disp) == SYMBOL_REF
12183 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12184 case UNSPEC_DTPOFF:
12185 disp = XVECEXP (disp, 0, 0);
12186 return (GET_CODE (disp) == SYMBOL_REF
12187 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12188 }
12189
12190 return false;
12191 }
12192
12193 /* Recognizes RTL expressions that are valid memory addresses for an
12194 instruction. The MODE argument is the machine mode for the MEM
12195 expression that wants to use this address.
12196
12197 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12198 convert common non-canonical forms to canonical form so that they will
12199 be recognized. */
12200
12201 static bool
12202 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12203 rtx addr, bool strict)
12204 {
12205 struct ix86_address parts;
12206 rtx base, index, disp;
12207 HOST_WIDE_INT scale;
12208
12209 if (ix86_decompose_address (addr, &parts) <= 0)
12210 /* Decomposition failed. */
12211 return false;
12212
12213 base = parts.base;
12214 index = parts.index;
12215 disp = parts.disp;
12216 scale = parts.scale;
12217
12218 /* Validate base register.
12219
12220 Don't allow SUBREG's that span more than a word here. It can lead to spill
12221 failures when the base is one word out of a two word structure, which is
12222 represented internally as a DImode int. */
12223
12224 if (base)
12225 {
12226 rtx reg;
12227
12228 if (REG_P (base))
12229 reg = base;
12230 else if (GET_CODE (base) == SUBREG
12231 && REG_P (SUBREG_REG (base))
12232 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12233 <= UNITS_PER_WORD)
12234 reg = SUBREG_REG (base);
12235 else
12236 /* Base is not a register. */
12237 return false;
12238
12239 if (GET_MODE (base) != Pmode)
12240 /* Base is not in Pmode. */
12241 return false;
12242
12243 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12244 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12245 /* Base is not valid. */
12246 return false;
12247 }
12248
12249 /* Validate index register.
12250
12251 Don't allow SUBREG's that span more than a word here -- same as above. */
12252
12253 if (index)
12254 {
12255 rtx reg;
12256
12257 if (REG_P (index))
12258 reg = index;
12259 else if (GET_CODE (index) == SUBREG
12260 && REG_P (SUBREG_REG (index))
12261 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12262 <= UNITS_PER_WORD)
12263 reg = SUBREG_REG (index);
12264 else
12265 /* Index is not a register. */
12266 return false;
12267
12268 if (GET_MODE (index) != Pmode)
12269 /* Index is not in Pmode. */
12270 return false;
12271
12272 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12273 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12274 /* Index is not valid. */
12275 return false;
12276 }
12277
12278 /* Validate scale factor. */
12279 if (scale != 1)
12280 {
12281 if (!index)
12282 /* Scale without index. */
12283 return false;
12284
12285 if (scale != 2 && scale != 4 && scale != 8)
12286 /* Scale is not a valid multiplier. */
12287 return false;
12288 }
12289
12290 /* Validate displacement. */
12291 if (disp)
12292 {
12293 if (GET_CODE (disp) == CONST
12294 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12295 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12296 switch (XINT (XEXP (disp, 0), 1))
12297 {
12298 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12299 used. While ABI specify also 32bit relocations, we don't produce
12300 them at all and use IP relative instead. */
12301 case UNSPEC_GOT:
12302 case UNSPEC_GOTOFF:
12303 gcc_assert (flag_pic);
12304 if (!TARGET_64BIT)
12305 goto is_legitimate_pic;
12306
12307 /* 64bit address unspec. */
12308 return false;
12309
12310 case UNSPEC_GOTPCREL:
12311 case UNSPEC_PCREL:
12312 gcc_assert (flag_pic);
12313 goto is_legitimate_pic;
12314
12315 case UNSPEC_GOTTPOFF:
12316 case UNSPEC_GOTNTPOFF:
12317 case UNSPEC_INDNTPOFF:
12318 case UNSPEC_NTPOFF:
12319 case UNSPEC_DTPOFF:
12320 break;
12321
12322 case UNSPEC_STACK_CHECK:
12323 gcc_assert (flag_split_stack);
12324 break;
12325
12326 default:
12327 /* Invalid address unspec. */
12328 return false;
12329 }
12330
12331 else if (SYMBOLIC_CONST (disp)
12332 && (flag_pic
12333 || (TARGET_MACHO
12334 #if TARGET_MACHO
12335 && MACHOPIC_INDIRECT
12336 && !machopic_operand_p (disp)
12337 #endif
12338 )))
12339 {
12340
12341 is_legitimate_pic:
12342 if (TARGET_64BIT && (index || base))
12343 {
12344 /* foo@dtpoff(%rX) is ok. */
12345 if (GET_CODE (disp) != CONST
12346 || GET_CODE (XEXP (disp, 0)) != PLUS
12347 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12348 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12349 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12350 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12351 /* Non-constant pic memory reference. */
12352 return false;
12353 }
12354 else if ((!TARGET_MACHO || flag_pic)
12355 && ! legitimate_pic_address_disp_p (disp))
12356 /* Displacement is an invalid pic construct. */
12357 return false;
12358 #if TARGET_MACHO
12359 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12360 /* displacment must be referenced via non_lazy_pointer */
12361 return false;
12362 #endif
12363
12364 /* This code used to verify that a symbolic pic displacement
12365 includes the pic_offset_table_rtx register.
12366
12367 While this is good idea, unfortunately these constructs may
12368 be created by "adds using lea" optimization for incorrect
12369 code like:
12370
12371 int a;
12372 int foo(int i)
12373 {
12374 return *(&a+i);
12375 }
12376
12377 This code is nonsensical, but results in addressing
12378 GOT table with pic_offset_table_rtx base. We can't
12379 just refuse it easily, since it gets matched by
12380 "addsi3" pattern, that later gets split to lea in the
12381 case output register differs from input. While this
12382 can be handled by separate addsi pattern for this case
12383 that never results in lea, this seems to be easier and
12384 correct fix for crash to disable this test. */
12385 }
12386 else if (GET_CODE (disp) != LABEL_REF
12387 && !CONST_INT_P (disp)
12388 && (GET_CODE (disp) != CONST
12389 || !legitimate_constant_p (disp))
12390 && (GET_CODE (disp) != SYMBOL_REF
12391 || !legitimate_constant_p (disp)))
12392 /* Displacement is not constant. */
12393 return false;
12394 else if (TARGET_64BIT
12395 && !x86_64_immediate_operand (disp, VOIDmode))
12396 /* Displacement is out of range. */
12397 return false;
12398 }
12399
12400 /* Everything looks valid. */
12401 return true;
12402 }
12403
12404 /* Determine if a given RTX is a valid constant address. */
12405
12406 bool
12407 constant_address_p (rtx x)
12408 {
12409 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12410 }
12411 \f
12412 /* Return a unique alias set for the GOT. */
12413
12414 static alias_set_type
12415 ix86_GOT_alias_set (void)
12416 {
12417 static alias_set_type set = -1;
12418 if (set == -1)
12419 set = new_alias_set ();
12420 return set;
12421 }
12422
12423 /* Return a legitimate reference for ORIG (an address) using the
12424 register REG. If REG is 0, a new pseudo is generated.
12425
12426 There are two types of references that must be handled:
12427
12428 1. Global data references must load the address from the GOT, via
12429 the PIC reg. An insn is emitted to do this load, and the reg is
12430 returned.
12431
12432 2. Static data references, constant pool addresses, and code labels
12433 compute the address as an offset from the GOT, whose base is in
12434 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12435 differentiate them from global data objects. The returned
12436 address is the PIC reg + an unspec constant.
12437
12438 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12439 reg also appears in the address. */
12440
12441 static rtx
12442 legitimize_pic_address (rtx orig, rtx reg)
12443 {
12444 rtx addr = orig;
12445 rtx new_rtx = orig;
12446 rtx base;
12447
12448 #if TARGET_MACHO
12449 if (TARGET_MACHO && !TARGET_64BIT)
12450 {
12451 if (reg == 0)
12452 reg = gen_reg_rtx (Pmode);
12453 /* Use the generic Mach-O PIC machinery. */
12454 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12455 }
12456 #endif
12457
12458 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12459 new_rtx = addr;
12460 else if (TARGET_64BIT
12461 && ix86_cmodel != CM_SMALL_PIC
12462 && gotoff_operand (addr, Pmode))
12463 {
12464 rtx tmpreg;
12465 /* This symbol may be referenced via a displacement from the PIC
12466 base address (@GOTOFF). */
12467
12468 if (reload_in_progress)
12469 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12470 if (GET_CODE (addr) == CONST)
12471 addr = XEXP (addr, 0);
12472 if (GET_CODE (addr) == PLUS)
12473 {
12474 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12475 UNSPEC_GOTOFF);
12476 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12477 }
12478 else
12479 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12480 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12481 if (!reg)
12482 tmpreg = gen_reg_rtx (Pmode);
12483 else
12484 tmpreg = reg;
12485 emit_move_insn (tmpreg, new_rtx);
12486
12487 if (reg != 0)
12488 {
12489 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12490 tmpreg, 1, OPTAB_DIRECT);
12491 new_rtx = reg;
12492 }
12493 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12494 }
12495 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12496 {
12497 /* This symbol may be referenced via a displacement from the PIC
12498 base address (@GOTOFF). */
12499
12500 if (reload_in_progress)
12501 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12502 if (GET_CODE (addr) == CONST)
12503 addr = XEXP (addr, 0);
12504 if (GET_CODE (addr) == PLUS)
12505 {
12506 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12507 UNSPEC_GOTOFF);
12508 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12509 }
12510 else
12511 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12512 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12513 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12514
12515 if (reg != 0)
12516 {
12517 emit_move_insn (reg, new_rtx);
12518 new_rtx = reg;
12519 }
12520 }
12521 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12522 /* We can't use @GOTOFF for text labels on VxWorks;
12523 see gotoff_operand. */
12524 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12525 {
12526 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12527 {
12528 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12529 return legitimize_dllimport_symbol (addr, true);
12530 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12531 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12532 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12533 {
12534 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12535 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12536 }
12537 }
12538
12539 /* For x64 PE-COFF there is no GOT table. So we use address
12540 directly. */
12541 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12542 {
12543 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12544 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12545
12546 if (reg == 0)
12547 reg = gen_reg_rtx (Pmode);
12548 emit_move_insn (reg, new_rtx);
12549 new_rtx = reg;
12550 }
12551 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12552 {
12553 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12554 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12555 new_rtx = gen_const_mem (Pmode, new_rtx);
12556 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12557
12558 if (reg == 0)
12559 reg = gen_reg_rtx (Pmode);
12560 /* Use directly gen_movsi, otherwise the address is loaded
12561 into register for CSE. We don't want to CSE this addresses,
12562 instead we CSE addresses from the GOT table, so skip this. */
12563 emit_insn (gen_movsi (reg, new_rtx));
12564 new_rtx = reg;
12565 }
12566 else
12567 {
12568 /* This symbol must be referenced via a load from the
12569 Global Offset Table (@GOT). */
12570
12571 if (reload_in_progress)
12572 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12573 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12574 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12575 if (TARGET_64BIT)
12576 new_rtx = force_reg (Pmode, new_rtx);
12577 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12578 new_rtx = gen_const_mem (Pmode, new_rtx);
12579 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12580
12581 if (reg == 0)
12582 reg = gen_reg_rtx (Pmode);
12583 emit_move_insn (reg, new_rtx);
12584 new_rtx = reg;
12585 }
12586 }
12587 else
12588 {
12589 if (CONST_INT_P (addr)
12590 && !x86_64_immediate_operand (addr, VOIDmode))
12591 {
12592 if (reg)
12593 {
12594 emit_move_insn (reg, addr);
12595 new_rtx = reg;
12596 }
12597 else
12598 new_rtx = force_reg (Pmode, addr);
12599 }
12600 else if (GET_CODE (addr) == CONST)
12601 {
12602 addr = XEXP (addr, 0);
12603
12604 /* We must match stuff we generate before. Assume the only
12605 unspecs that can get here are ours. Not that we could do
12606 anything with them anyway.... */
12607 if (GET_CODE (addr) == UNSPEC
12608 || (GET_CODE (addr) == PLUS
12609 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12610 return orig;
12611 gcc_assert (GET_CODE (addr) == PLUS);
12612 }
12613 if (GET_CODE (addr) == PLUS)
12614 {
12615 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12616
12617 /* Check first to see if this is a constant offset from a @GOTOFF
12618 symbol reference. */
12619 if (gotoff_operand (op0, Pmode)
12620 && CONST_INT_P (op1))
12621 {
12622 if (!TARGET_64BIT)
12623 {
12624 if (reload_in_progress)
12625 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12626 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12627 UNSPEC_GOTOFF);
12628 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12629 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12630 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12631
12632 if (reg != 0)
12633 {
12634 emit_move_insn (reg, new_rtx);
12635 new_rtx = reg;
12636 }
12637 }
12638 else
12639 {
12640 if (INTVAL (op1) < -16*1024*1024
12641 || INTVAL (op1) >= 16*1024*1024)
12642 {
12643 if (!x86_64_immediate_operand (op1, Pmode))
12644 op1 = force_reg (Pmode, op1);
12645 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12646 }
12647 }
12648 }
12649 else
12650 {
12651 base = legitimize_pic_address (XEXP (addr, 0), reg);
12652 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12653 base == reg ? NULL_RTX : reg);
12654
12655 if (CONST_INT_P (new_rtx))
12656 new_rtx = plus_constant (base, INTVAL (new_rtx));
12657 else
12658 {
12659 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12660 {
12661 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12662 new_rtx = XEXP (new_rtx, 1);
12663 }
12664 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12665 }
12666 }
12667 }
12668 }
12669 return new_rtx;
12670 }
12671 \f
12672 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12673
12674 static rtx
12675 get_thread_pointer (int to_reg)
12676 {
12677 rtx tp, reg, insn;
12678
12679 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12680 if (!to_reg)
12681 return tp;
12682
12683 reg = gen_reg_rtx (Pmode);
12684 insn = gen_rtx_SET (VOIDmode, reg, tp);
12685 insn = emit_insn (insn);
12686
12687 return reg;
12688 }
12689
12690 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12691 false if we expect this to be used for a memory address and true if
12692 we expect to load the address into a register. */
12693
12694 static rtx
12695 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12696 {
12697 rtx dest, base, off, pic, tp;
12698 int type;
12699
12700 switch (model)
12701 {
12702 case TLS_MODEL_GLOBAL_DYNAMIC:
12703 dest = gen_reg_rtx (Pmode);
12704 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12705
12706 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12707 {
12708 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12709
12710 start_sequence ();
12711 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12712 insns = get_insns ();
12713 end_sequence ();
12714
12715 RTL_CONST_CALL_P (insns) = 1;
12716 emit_libcall_block (insns, dest, rax, x);
12717 }
12718 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12719 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12720 else
12721 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12722
12723 if (TARGET_GNU2_TLS)
12724 {
12725 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12726
12727 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12728 }
12729 break;
12730
12731 case TLS_MODEL_LOCAL_DYNAMIC:
12732 base = gen_reg_rtx (Pmode);
12733 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12734
12735 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12736 {
12737 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12738
12739 start_sequence ();
12740 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12741 insns = get_insns ();
12742 end_sequence ();
12743
12744 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12745 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12746 RTL_CONST_CALL_P (insns) = 1;
12747 emit_libcall_block (insns, base, rax, note);
12748 }
12749 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12750 emit_insn (gen_tls_local_dynamic_base_64 (base));
12751 else
12752 emit_insn (gen_tls_local_dynamic_base_32 (base));
12753
12754 if (TARGET_GNU2_TLS)
12755 {
12756 rtx x = ix86_tls_module_base ();
12757
12758 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12759 gen_rtx_MINUS (Pmode, x, tp));
12760 }
12761
12762 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12763 off = gen_rtx_CONST (Pmode, off);
12764
12765 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12766
12767 if (TARGET_GNU2_TLS)
12768 {
12769 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12770
12771 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12772 }
12773
12774 break;
12775
12776 case TLS_MODEL_INITIAL_EXEC:
12777 if (TARGET_64BIT)
12778 {
12779 if (TARGET_SUN_TLS)
12780 {
12781 /* The Sun linker took the AMD64 TLS spec literally
12782 and can only handle %rax as destination of the
12783 initial executable code sequence. */
12784
12785 dest = gen_reg_rtx (Pmode);
12786 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12787 return dest;
12788 }
12789
12790 pic = NULL;
12791 type = UNSPEC_GOTNTPOFF;
12792 }
12793 else if (flag_pic)
12794 {
12795 if (reload_in_progress)
12796 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12797 pic = pic_offset_table_rtx;
12798 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12799 }
12800 else if (!TARGET_ANY_GNU_TLS)
12801 {
12802 pic = gen_reg_rtx (Pmode);
12803 emit_insn (gen_set_got (pic));
12804 type = UNSPEC_GOTTPOFF;
12805 }
12806 else
12807 {
12808 pic = NULL;
12809 type = UNSPEC_INDNTPOFF;
12810 }
12811
12812 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12813 off = gen_rtx_CONST (Pmode, off);
12814 if (pic)
12815 off = gen_rtx_PLUS (Pmode, pic, off);
12816 off = gen_const_mem (Pmode, off);
12817 set_mem_alias_set (off, ix86_GOT_alias_set ());
12818
12819 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12820 {
12821 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12822 off = force_reg (Pmode, off);
12823 return gen_rtx_PLUS (Pmode, base, off);
12824 }
12825 else
12826 {
12827 base = get_thread_pointer (true);
12828 dest = gen_reg_rtx (Pmode);
12829 emit_insn (gen_subsi3 (dest, base, off));
12830 }
12831 break;
12832
12833 case TLS_MODEL_LOCAL_EXEC:
12834 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12835 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12836 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12837 off = gen_rtx_CONST (Pmode, off);
12838
12839 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12840 {
12841 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12842 return gen_rtx_PLUS (Pmode, base, off);
12843 }
12844 else
12845 {
12846 base = get_thread_pointer (true);
12847 dest = gen_reg_rtx (Pmode);
12848 emit_insn (gen_subsi3 (dest, base, off));
12849 }
12850 break;
12851
12852 default:
12853 gcc_unreachable ();
12854 }
12855
12856 return dest;
12857 }
12858
12859 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12860 to symbol DECL. */
12861
12862 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12863 htab_t dllimport_map;
12864
12865 static tree
12866 get_dllimport_decl (tree decl)
12867 {
12868 struct tree_map *h, in;
12869 void **loc;
12870 const char *name;
12871 const char *prefix;
12872 size_t namelen, prefixlen;
12873 char *imp_name;
12874 tree to;
12875 rtx rtl;
12876
12877 if (!dllimport_map)
12878 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12879
12880 in.hash = htab_hash_pointer (decl);
12881 in.base.from = decl;
12882 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12883 h = (struct tree_map *) *loc;
12884 if (h)
12885 return h->to;
12886
12887 *loc = h = ggc_alloc_tree_map ();
12888 h->hash = in.hash;
12889 h->base.from = decl;
12890 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12891 VAR_DECL, NULL, ptr_type_node);
12892 DECL_ARTIFICIAL (to) = 1;
12893 DECL_IGNORED_P (to) = 1;
12894 DECL_EXTERNAL (to) = 1;
12895 TREE_READONLY (to) = 1;
12896
12897 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12898 name = targetm.strip_name_encoding (name);
12899 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12900 ? "*__imp_" : "*__imp__";
12901 namelen = strlen (name);
12902 prefixlen = strlen (prefix);
12903 imp_name = (char *) alloca (namelen + prefixlen + 1);
12904 memcpy (imp_name, prefix, prefixlen);
12905 memcpy (imp_name + prefixlen, name, namelen + 1);
12906
12907 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12908 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12909 SET_SYMBOL_REF_DECL (rtl, to);
12910 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12911
12912 rtl = gen_const_mem (Pmode, rtl);
12913 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12914
12915 SET_DECL_RTL (to, rtl);
12916 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12917
12918 return to;
12919 }
12920
12921 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12922 true if we require the result be a register. */
12923
12924 static rtx
12925 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12926 {
12927 tree imp_decl;
12928 rtx x;
12929
12930 gcc_assert (SYMBOL_REF_DECL (symbol));
12931 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12932
12933 x = DECL_RTL (imp_decl);
12934 if (want_reg)
12935 x = force_reg (Pmode, x);
12936 return x;
12937 }
12938
12939 /* Try machine-dependent ways of modifying an illegitimate address
12940 to be legitimate. If we find one, return the new, valid address.
12941 This macro is used in only one place: `memory_address' in explow.c.
12942
12943 OLDX is the address as it was before break_out_memory_refs was called.
12944 In some cases it is useful to look at this to decide what needs to be done.
12945
12946 It is always safe for this macro to do nothing. It exists to recognize
12947 opportunities to optimize the output.
12948
12949 For the 80386, we handle X+REG by loading X into a register R and
12950 using R+REG. R will go in a general reg and indexing will be used.
12951 However, if REG is a broken-out memory address or multiplication,
12952 nothing needs to be done because REG can certainly go in a general reg.
12953
12954 When -fpic is used, special handling is needed for symbolic references.
12955 See comments by legitimize_pic_address in i386.c for details. */
12956
12957 static rtx
12958 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12959 enum machine_mode mode)
12960 {
12961 int changed = 0;
12962 unsigned log;
12963
12964 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12965 if (log)
12966 return legitimize_tls_address (x, (enum tls_model) log, false);
12967 if (GET_CODE (x) == CONST
12968 && GET_CODE (XEXP (x, 0)) == PLUS
12969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12970 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12971 {
12972 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12973 (enum tls_model) log, false);
12974 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12975 }
12976
12977 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12978 {
12979 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12980 return legitimize_dllimport_symbol (x, true);
12981 if (GET_CODE (x) == CONST
12982 && GET_CODE (XEXP (x, 0)) == PLUS
12983 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12984 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12985 {
12986 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12987 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12988 }
12989 }
12990
12991 if (flag_pic && SYMBOLIC_CONST (x))
12992 return legitimize_pic_address (x, 0);
12993
12994 #if TARGET_MACHO
12995 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12996 return machopic_indirect_data_reference (x, 0);
12997 #endif
12998
12999 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13000 if (GET_CODE (x) == ASHIFT
13001 && CONST_INT_P (XEXP (x, 1))
13002 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13003 {
13004 changed = 1;
13005 log = INTVAL (XEXP (x, 1));
13006 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13007 GEN_INT (1 << log));
13008 }
13009
13010 if (GET_CODE (x) == PLUS)
13011 {
13012 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13013
13014 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13015 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13016 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13017 {
13018 changed = 1;
13019 log = INTVAL (XEXP (XEXP (x, 0), 1));
13020 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13021 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13022 GEN_INT (1 << log));
13023 }
13024
13025 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13026 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13027 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13028 {
13029 changed = 1;
13030 log = INTVAL (XEXP (XEXP (x, 1), 1));
13031 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13032 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13033 GEN_INT (1 << log));
13034 }
13035
13036 /* Put multiply first if it isn't already. */
13037 if (GET_CODE (XEXP (x, 1)) == MULT)
13038 {
13039 rtx tmp = XEXP (x, 0);
13040 XEXP (x, 0) = XEXP (x, 1);
13041 XEXP (x, 1) = tmp;
13042 changed = 1;
13043 }
13044
13045 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13046 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13047 created by virtual register instantiation, register elimination, and
13048 similar optimizations. */
13049 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13050 {
13051 changed = 1;
13052 x = gen_rtx_PLUS (Pmode,
13053 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13054 XEXP (XEXP (x, 1), 0)),
13055 XEXP (XEXP (x, 1), 1));
13056 }
13057
13058 /* Canonicalize
13059 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13060 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13061 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13062 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13063 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13064 && CONSTANT_P (XEXP (x, 1)))
13065 {
13066 rtx constant;
13067 rtx other = NULL_RTX;
13068
13069 if (CONST_INT_P (XEXP (x, 1)))
13070 {
13071 constant = XEXP (x, 1);
13072 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13073 }
13074 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13075 {
13076 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13077 other = XEXP (x, 1);
13078 }
13079 else
13080 constant = 0;
13081
13082 if (constant)
13083 {
13084 changed = 1;
13085 x = gen_rtx_PLUS (Pmode,
13086 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13087 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13088 plus_constant (other, INTVAL (constant)));
13089 }
13090 }
13091
13092 if (changed && ix86_legitimate_address_p (mode, x, false))
13093 return x;
13094
13095 if (GET_CODE (XEXP (x, 0)) == MULT)
13096 {
13097 changed = 1;
13098 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13099 }
13100
13101 if (GET_CODE (XEXP (x, 1)) == MULT)
13102 {
13103 changed = 1;
13104 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13105 }
13106
13107 if (changed
13108 && REG_P (XEXP (x, 1))
13109 && REG_P (XEXP (x, 0)))
13110 return x;
13111
13112 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13113 {
13114 changed = 1;
13115 x = legitimize_pic_address (x, 0);
13116 }
13117
13118 if (changed && ix86_legitimate_address_p (mode, x, false))
13119 return x;
13120
13121 if (REG_P (XEXP (x, 0)))
13122 {
13123 rtx temp = gen_reg_rtx (Pmode);
13124 rtx val = force_operand (XEXP (x, 1), temp);
13125 if (val != temp)
13126 emit_move_insn (temp, val);
13127
13128 XEXP (x, 1) = temp;
13129 return x;
13130 }
13131
13132 else if (REG_P (XEXP (x, 1)))
13133 {
13134 rtx temp = gen_reg_rtx (Pmode);
13135 rtx val = force_operand (XEXP (x, 0), temp);
13136 if (val != temp)
13137 emit_move_insn (temp, val);
13138
13139 XEXP (x, 0) = temp;
13140 return x;
13141 }
13142 }
13143
13144 return x;
13145 }
13146 \f
13147 /* Print an integer constant expression in assembler syntax. Addition
13148 and subtraction are the only arithmetic that may appear in these
13149 expressions. FILE is the stdio stream to write to, X is the rtx, and
13150 CODE is the operand print code from the output string. */
13151
13152 static void
13153 output_pic_addr_const (FILE *file, rtx x, int code)
13154 {
13155 char buf[256];
13156
13157 switch (GET_CODE (x))
13158 {
13159 case PC:
13160 gcc_assert (flag_pic);
13161 putc ('.', file);
13162 break;
13163
13164 case SYMBOL_REF:
13165 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13166 output_addr_const (file, x);
13167 else
13168 {
13169 const char *name = XSTR (x, 0);
13170
13171 /* Mark the decl as referenced so that cgraph will
13172 output the function. */
13173 if (SYMBOL_REF_DECL (x))
13174 mark_decl_referenced (SYMBOL_REF_DECL (x));
13175
13176 #if TARGET_MACHO
13177 if (MACHOPIC_INDIRECT
13178 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13179 name = machopic_indirection_name (x, /*stub_p=*/true);
13180 #endif
13181 assemble_name (file, name);
13182 }
13183 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13184 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13185 fputs ("@PLT", file);
13186 break;
13187
13188 case LABEL_REF:
13189 x = XEXP (x, 0);
13190 /* FALLTHRU */
13191 case CODE_LABEL:
13192 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13193 assemble_name (asm_out_file, buf);
13194 break;
13195
13196 case CONST_INT:
13197 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13198 break;
13199
13200 case CONST:
13201 /* This used to output parentheses around the expression,
13202 but that does not work on the 386 (either ATT or BSD assembler). */
13203 output_pic_addr_const (file, XEXP (x, 0), code);
13204 break;
13205
13206 case CONST_DOUBLE:
13207 if (GET_MODE (x) == VOIDmode)
13208 {
13209 /* We can use %d if the number is <32 bits and positive. */
13210 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13211 fprintf (file, "0x%lx%08lx",
13212 (unsigned long) CONST_DOUBLE_HIGH (x),
13213 (unsigned long) CONST_DOUBLE_LOW (x));
13214 else
13215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13216 }
13217 else
13218 /* We can't handle floating point constants;
13219 TARGET_PRINT_OPERAND must handle them. */
13220 output_operand_lossage ("floating constant misused");
13221 break;
13222
13223 case PLUS:
13224 /* Some assemblers need integer constants to appear first. */
13225 if (CONST_INT_P (XEXP (x, 0)))
13226 {
13227 output_pic_addr_const (file, XEXP (x, 0), code);
13228 putc ('+', file);
13229 output_pic_addr_const (file, XEXP (x, 1), code);
13230 }
13231 else
13232 {
13233 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13234 output_pic_addr_const (file, XEXP (x, 1), code);
13235 putc ('+', file);
13236 output_pic_addr_const (file, XEXP (x, 0), code);
13237 }
13238 break;
13239
13240 case MINUS:
13241 if (!TARGET_MACHO)
13242 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13243 output_pic_addr_const (file, XEXP (x, 0), code);
13244 putc ('-', file);
13245 output_pic_addr_const (file, XEXP (x, 1), code);
13246 if (!TARGET_MACHO)
13247 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13248 break;
13249
13250 case UNSPEC:
13251 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13252 {
13253 bool f = i386_asm_output_addr_const_extra (file, x);
13254 gcc_assert (f);
13255 break;
13256 }
13257
13258 gcc_assert (XVECLEN (x, 0) == 1);
13259 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13260 switch (XINT (x, 1))
13261 {
13262 case UNSPEC_GOT:
13263 fputs ("@GOT", file);
13264 break;
13265 case UNSPEC_GOTOFF:
13266 fputs ("@GOTOFF", file);
13267 break;
13268 case UNSPEC_PLTOFF:
13269 fputs ("@PLTOFF", file);
13270 break;
13271 case UNSPEC_PCREL:
13272 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13273 "(%rip)" : "[rip]", file);
13274 break;
13275 case UNSPEC_GOTPCREL:
13276 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13277 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13278 break;
13279 case UNSPEC_GOTTPOFF:
13280 /* FIXME: This might be @TPOFF in Sun ld too. */
13281 fputs ("@gottpoff", file);
13282 break;
13283 case UNSPEC_TPOFF:
13284 fputs ("@tpoff", file);
13285 break;
13286 case UNSPEC_NTPOFF:
13287 if (TARGET_64BIT)
13288 fputs ("@tpoff", file);
13289 else
13290 fputs ("@ntpoff", file);
13291 break;
13292 case UNSPEC_DTPOFF:
13293 fputs ("@dtpoff", file);
13294 break;
13295 case UNSPEC_GOTNTPOFF:
13296 if (TARGET_64BIT)
13297 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13298 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13299 else
13300 fputs ("@gotntpoff", file);
13301 break;
13302 case UNSPEC_INDNTPOFF:
13303 fputs ("@indntpoff", file);
13304 break;
13305 #if TARGET_MACHO
13306 case UNSPEC_MACHOPIC_OFFSET:
13307 putc ('-', file);
13308 machopic_output_function_base_name (file);
13309 break;
13310 #endif
13311 default:
13312 output_operand_lossage ("invalid UNSPEC as operand");
13313 break;
13314 }
13315 break;
13316
13317 default:
13318 output_operand_lossage ("invalid expression as operand");
13319 }
13320 }
13321
13322 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13323 We need to emit DTP-relative relocations. */
13324
13325 static void ATTRIBUTE_UNUSED
13326 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13327 {
13328 fputs (ASM_LONG, file);
13329 output_addr_const (file, x);
13330 fputs ("@dtpoff", file);
13331 switch (size)
13332 {
13333 case 4:
13334 break;
13335 case 8:
13336 fputs (", 0", file);
13337 break;
13338 default:
13339 gcc_unreachable ();
13340 }
13341 }
13342
13343 /* Return true if X is a representation of the PIC register. This copes
13344 with calls from ix86_find_base_term, where the register might have
13345 been replaced by a cselib value. */
13346
13347 static bool
13348 ix86_pic_register_p (rtx x)
13349 {
13350 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13351 return (pic_offset_table_rtx
13352 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13353 else
13354 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13355 }
13356
13357 /* Helper function for ix86_delegitimize_address.
13358 Attempt to delegitimize TLS local-exec accesses. */
13359
13360 static rtx
13361 ix86_delegitimize_tls_address (rtx orig_x)
13362 {
13363 rtx x = orig_x, unspec;
13364 struct ix86_address addr;
13365
13366 if (!TARGET_TLS_DIRECT_SEG_REFS)
13367 return orig_x;
13368 if (MEM_P (x))
13369 x = XEXP (x, 0);
13370 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13371 return orig_x;
13372 if (ix86_decompose_address (x, &addr) == 0
13373 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13374 || addr.disp == NULL_RTX
13375 || GET_CODE (addr.disp) != CONST)
13376 return orig_x;
13377 unspec = XEXP (addr.disp, 0);
13378 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13379 unspec = XEXP (unspec, 0);
13380 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13381 return orig_x;
13382 x = XVECEXP (unspec, 0, 0);
13383 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13384 if (unspec != XEXP (addr.disp, 0))
13385 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13386 if (addr.index)
13387 {
13388 rtx idx = addr.index;
13389 if (addr.scale != 1)
13390 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13391 x = gen_rtx_PLUS (Pmode, idx, x);
13392 }
13393 if (addr.base)
13394 x = gen_rtx_PLUS (Pmode, addr.base, x);
13395 if (MEM_P (orig_x))
13396 x = replace_equiv_address_nv (orig_x, x);
13397 return x;
13398 }
13399
13400 /* In the name of slightly smaller debug output, and to cater to
13401 general assembler lossage, recognize PIC+GOTOFF and turn it back
13402 into a direct symbol reference.
13403
13404 On Darwin, this is necessary to avoid a crash, because Darwin
13405 has a different PIC label for each routine but the DWARF debugging
13406 information is not associated with any particular routine, so it's
13407 necessary to remove references to the PIC label from RTL stored by
13408 the DWARF output code. */
13409
13410 static rtx
13411 ix86_delegitimize_address (rtx x)
13412 {
13413 rtx orig_x = delegitimize_mem_from_attrs (x);
13414 /* addend is NULL or some rtx if x is something+GOTOFF where
13415 something doesn't include the PIC register. */
13416 rtx addend = NULL_RTX;
13417 /* reg_addend is NULL or a multiple of some register. */
13418 rtx reg_addend = NULL_RTX;
13419 /* const_addend is NULL or a const_int. */
13420 rtx const_addend = NULL_RTX;
13421 /* This is the result, or NULL. */
13422 rtx result = NULL_RTX;
13423
13424 x = orig_x;
13425
13426 if (MEM_P (x))
13427 x = XEXP (x, 0);
13428
13429 if (TARGET_64BIT)
13430 {
13431 if (GET_CODE (x) != CONST
13432 || GET_CODE (XEXP (x, 0)) != UNSPEC
13433 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13434 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13435 || !MEM_P (orig_x))
13436 return ix86_delegitimize_tls_address (orig_x);
13437 x = XVECEXP (XEXP (x, 0), 0, 0);
13438 if (GET_MODE (orig_x) != Pmode)
13439 {
13440 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13441 if (x == NULL_RTX)
13442 return orig_x;
13443 }
13444 return x;
13445 }
13446
13447 if (GET_CODE (x) != PLUS
13448 || GET_CODE (XEXP (x, 1)) != CONST)
13449 return ix86_delegitimize_tls_address (orig_x);
13450
13451 if (ix86_pic_register_p (XEXP (x, 0)))
13452 /* %ebx + GOT/GOTOFF */
13453 ;
13454 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13455 {
13456 /* %ebx + %reg * scale + GOT/GOTOFF */
13457 reg_addend = XEXP (x, 0);
13458 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13459 reg_addend = XEXP (reg_addend, 1);
13460 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13461 reg_addend = XEXP (reg_addend, 0);
13462 else
13463 {
13464 reg_addend = NULL_RTX;
13465 addend = XEXP (x, 0);
13466 }
13467 }
13468 else
13469 addend = XEXP (x, 0);
13470
13471 x = XEXP (XEXP (x, 1), 0);
13472 if (GET_CODE (x) == PLUS
13473 && CONST_INT_P (XEXP (x, 1)))
13474 {
13475 const_addend = XEXP (x, 1);
13476 x = XEXP (x, 0);
13477 }
13478
13479 if (GET_CODE (x) == UNSPEC
13480 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13481 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13482 result = XVECEXP (x, 0, 0);
13483
13484 if (TARGET_MACHO && darwin_local_data_pic (x)
13485 && !MEM_P (orig_x))
13486 result = XVECEXP (x, 0, 0);
13487
13488 if (! result)
13489 return ix86_delegitimize_tls_address (orig_x);
13490
13491 if (const_addend)
13492 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13493 if (reg_addend)
13494 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13495 if (addend)
13496 {
13497 /* If the rest of original X doesn't involve the PIC register, add
13498 addend and subtract pic_offset_table_rtx. This can happen e.g.
13499 for code like:
13500 leal (%ebx, %ecx, 4), %ecx
13501 ...
13502 movl foo@GOTOFF(%ecx), %edx
13503 in which case we return (%ecx - %ebx) + foo. */
13504 if (pic_offset_table_rtx)
13505 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13506 pic_offset_table_rtx),
13507 result);
13508 else
13509 return orig_x;
13510 }
13511 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13512 {
13513 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13514 if (result == NULL_RTX)
13515 return orig_x;
13516 }
13517 return result;
13518 }
13519
13520 /* If X is a machine specific address (i.e. a symbol or label being
13521 referenced as a displacement from the GOT implemented using an
13522 UNSPEC), then return the base term. Otherwise return X. */
13523
13524 rtx
13525 ix86_find_base_term (rtx x)
13526 {
13527 rtx term;
13528
13529 if (TARGET_64BIT)
13530 {
13531 if (GET_CODE (x) != CONST)
13532 return x;
13533 term = XEXP (x, 0);
13534 if (GET_CODE (term) == PLUS
13535 && (CONST_INT_P (XEXP (term, 1))
13536 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13537 term = XEXP (term, 0);
13538 if (GET_CODE (term) != UNSPEC
13539 || (XINT (term, 1) != UNSPEC_GOTPCREL
13540 && XINT (term, 1) != UNSPEC_PCREL))
13541 return x;
13542
13543 return XVECEXP (term, 0, 0);
13544 }
13545
13546 return ix86_delegitimize_address (x);
13547 }
13548 \f
13549 static void
13550 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13551 int fp, FILE *file)
13552 {
13553 const char *suffix;
13554
13555 if (mode == CCFPmode || mode == CCFPUmode)
13556 {
13557 code = ix86_fp_compare_code_to_integer (code);
13558 mode = CCmode;
13559 }
13560 if (reverse)
13561 code = reverse_condition (code);
13562
13563 switch (code)
13564 {
13565 case EQ:
13566 switch (mode)
13567 {
13568 case CCAmode:
13569 suffix = "a";
13570 break;
13571
13572 case CCCmode:
13573 suffix = "c";
13574 break;
13575
13576 case CCOmode:
13577 suffix = "o";
13578 break;
13579
13580 case CCSmode:
13581 suffix = "s";
13582 break;
13583
13584 default:
13585 suffix = "e";
13586 }
13587 break;
13588 case NE:
13589 switch (mode)
13590 {
13591 case CCAmode:
13592 suffix = "na";
13593 break;
13594
13595 case CCCmode:
13596 suffix = "nc";
13597 break;
13598
13599 case CCOmode:
13600 suffix = "no";
13601 break;
13602
13603 case CCSmode:
13604 suffix = "ns";
13605 break;
13606
13607 default:
13608 suffix = "ne";
13609 }
13610 break;
13611 case GT:
13612 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13613 suffix = "g";
13614 break;
13615 case GTU:
13616 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13617 Those same assemblers have the same but opposite lossage on cmov. */
13618 if (mode == CCmode)
13619 suffix = fp ? "nbe" : "a";
13620 else if (mode == CCCmode)
13621 suffix = "b";
13622 else
13623 gcc_unreachable ();
13624 break;
13625 case LT:
13626 switch (mode)
13627 {
13628 case CCNOmode:
13629 case CCGOCmode:
13630 suffix = "s";
13631 break;
13632
13633 case CCmode:
13634 case CCGCmode:
13635 suffix = "l";
13636 break;
13637
13638 default:
13639 gcc_unreachable ();
13640 }
13641 break;
13642 case LTU:
13643 gcc_assert (mode == CCmode || mode == CCCmode);
13644 suffix = "b";
13645 break;
13646 case GE:
13647 switch (mode)
13648 {
13649 case CCNOmode:
13650 case CCGOCmode:
13651 suffix = "ns";
13652 break;
13653
13654 case CCmode:
13655 case CCGCmode:
13656 suffix = "ge";
13657 break;
13658
13659 default:
13660 gcc_unreachable ();
13661 }
13662 break;
13663 case GEU:
13664 /* ??? As above. */
13665 gcc_assert (mode == CCmode || mode == CCCmode);
13666 suffix = fp ? "nb" : "ae";
13667 break;
13668 case LE:
13669 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13670 suffix = "le";
13671 break;
13672 case LEU:
13673 /* ??? As above. */
13674 if (mode == CCmode)
13675 suffix = "be";
13676 else if (mode == CCCmode)
13677 suffix = fp ? "nb" : "ae";
13678 else
13679 gcc_unreachable ();
13680 break;
13681 case UNORDERED:
13682 suffix = fp ? "u" : "p";
13683 break;
13684 case ORDERED:
13685 suffix = fp ? "nu" : "np";
13686 break;
13687 default:
13688 gcc_unreachable ();
13689 }
13690 fputs (suffix, file);
13691 }
13692
13693 /* Print the name of register X to FILE based on its machine mode and number.
13694 If CODE is 'w', pretend the mode is HImode.
13695 If CODE is 'b', pretend the mode is QImode.
13696 If CODE is 'k', pretend the mode is SImode.
13697 If CODE is 'q', pretend the mode is DImode.
13698 If CODE is 'x', pretend the mode is V4SFmode.
13699 If CODE is 't', pretend the mode is V8SFmode.
13700 If CODE is 'h', pretend the reg is the 'high' byte register.
13701 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13702 If CODE is 'd', duplicate the operand for AVX instruction.
13703 */
13704
13705 void
13706 print_reg (rtx x, int code, FILE *file)
13707 {
13708 const char *reg;
13709 bool duplicated = code == 'd' && TARGET_AVX;
13710
13711 gcc_assert (x == pc_rtx
13712 || (REGNO (x) != ARG_POINTER_REGNUM
13713 && REGNO (x) != FRAME_POINTER_REGNUM
13714 && REGNO (x) != FLAGS_REG
13715 && REGNO (x) != FPSR_REG
13716 && REGNO (x) != FPCR_REG));
13717
13718 if (ASSEMBLER_DIALECT == ASM_ATT)
13719 putc ('%', file);
13720
13721 if (x == pc_rtx)
13722 {
13723 gcc_assert (TARGET_64BIT);
13724 fputs ("rip", file);
13725 return;
13726 }
13727
13728 if (code == 'w' || MMX_REG_P (x))
13729 code = 2;
13730 else if (code == 'b')
13731 code = 1;
13732 else if (code == 'k')
13733 code = 4;
13734 else if (code == 'q')
13735 code = 8;
13736 else if (code == 'y')
13737 code = 3;
13738 else if (code == 'h')
13739 code = 0;
13740 else if (code == 'x')
13741 code = 16;
13742 else if (code == 't')
13743 code = 32;
13744 else
13745 code = GET_MODE_SIZE (GET_MODE (x));
13746
13747 /* Irritatingly, AMD extended registers use different naming convention
13748 from the normal registers. */
13749 if (REX_INT_REG_P (x))
13750 {
13751 gcc_assert (TARGET_64BIT);
13752 switch (code)
13753 {
13754 case 0:
13755 error ("extended registers have no high halves");
13756 break;
13757 case 1:
13758 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13759 break;
13760 case 2:
13761 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13762 break;
13763 case 4:
13764 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13765 break;
13766 case 8:
13767 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13768 break;
13769 default:
13770 error ("unsupported operand size for extended register");
13771 break;
13772 }
13773 return;
13774 }
13775
13776 reg = NULL;
13777 switch (code)
13778 {
13779 case 3:
13780 if (STACK_TOP_P (x))
13781 {
13782 reg = "st(0)";
13783 break;
13784 }
13785 /* FALLTHRU */
13786 case 8:
13787 case 4:
13788 case 12:
13789 if (! ANY_FP_REG_P (x))
13790 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13791 /* FALLTHRU */
13792 case 16:
13793 case 2:
13794 normal:
13795 reg = hi_reg_name[REGNO (x)];
13796 break;
13797 case 1:
13798 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13799 goto normal;
13800 reg = qi_reg_name[REGNO (x)];
13801 break;
13802 case 0:
13803 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13804 goto normal;
13805 reg = qi_high_reg_name[REGNO (x)];
13806 break;
13807 case 32:
13808 if (SSE_REG_P (x))
13809 {
13810 gcc_assert (!duplicated);
13811 putc ('y', file);
13812 fputs (hi_reg_name[REGNO (x)] + 1, file);
13813 return;
13814 }
13815 break;
13816 default:
13817 gcc_unreachable ();
13818 }
13819
13820 fputs (reg, file);
13821 if (duplicated)
13822 {
13823 if (ASSEMBLER_DIALECT == ASM_ATT)
13824 fprintf (file, ", %%%s", reg);
13825 else
13826 fprintf (file, ", %s", reg);
13827 }
13828 }
13829
13830 /* Locate some local-dynamic symbol still in use by this function
13831 so that we can print its name in some tls_local_dynamic_base
13832 pattern. */
13833
13834 static int
13835 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13836 {
13837 rtx x = *px;
13838
13839 if (GET_CODE (x) == SYMBOL_REF
13840 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13841 {
13842 cfun->machine->some_ld_name = XSTR (x, 0);
13843 return 1;
13844 }
13845
13846 return 0;
13847 }
13848
13849 static const char *
13850 get_some_local_dynamic_name (void)
13851 {
13852 rtx insn;
13853
13854 if (cfun->machine->some_ld_name)
13855 return cfun->machine->some_ld_name;
13856
13857 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13858 if (NONDEBUG_INSN_P (insn)
13859 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13860 return cfun->machine->some_ld_name;
13861
13862 return NULL;
13863 }
13864
13865 /* Meaning of CODE:
13866 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13867 C -- print opcode suffix for set/cmov insn.
13868 c -- like C, but print reversed condition
13869 F,f -- likewise, but for floating-point.
13870 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13871 otherwise nothing
13872 R -- print the prefix for register names.
13873 z -- print the opcode suffix for the size of the current operand.
13874 Z -- likewise, with special suffixes for x87 instructions.
13875 * -- print a star (in certain assembler syntax)
13876 A -- print an absolute memory reference.
13877 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13878 s -- print a shift double count, followed by the assemblers argument
13879 delimiter.
13880 b -- print the QImode name of the register for the indicated operand.
13881 %b0 would print %al if operands[0] is reg 0.
13882 w -- likewise, print the HImode name of the register.
13883 k -- likewise, print the SImode name of the register.
13884 q -- likewise, print the DImode name of the register.
13885 x -- likewise, print the V4SFmode name of the register.
13886 t -- likewise, print the V8SFmode name of the register.
13887 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13888 y -- print "st(0)" instead of "st" as a register.
13889 d -- print duplicated register operand for AVX instruction.
13890 D -- print condition for SSE cmp instruction.
13891 P -- if PIC, print an @PLT suffix.
13892 X -- don't print any sort of PIC '@' suffix for a symbol.
13893 & -- print some in-use local-dynamic symbol name.
13894 H -- print a memory address offset by 8; used for sse high-parts
13895 Y -- print condition for XOP pcom* instruction.
13896 + -- print a branch hint as 'cs' or 'ds' prefix
13897 ; -- print a semicolon (after prefixes due to bug in older gas).
13898 @ -- print a segment register of thread base pointer load
13899 */
13900
13901 void
13902 ix86_print_operand (FILE *file, rtx x, int code)
13903 {
13904 if (code)
13905 {
13906 switch (code)
13907 {
13908 case '*':
13909 if (ASSEMBLER_DIALECT == ASM_ATT)
13910 putc ('*', file);
13911 return;
13912
13913 case '&':
13914 {
13915 const char *name = get_some_local_dynamic_name ();
13916 if (name == NULL)
13917 output_operand_lossage ("'%%&' used without any "
13918 "local dynamic TLS references");
13919 else
13920 assemble_name (file, name);
13921 return;
13922 }
13923
13924 case 'A':
13925 switch (ASSEMBLER_DIALECT)
13926 {
13927 case ASM_ATT:
13928 putc ('*', file);
13929 break;
13930
13931 case ASM_INTEL:
13932 /* Intel syntax. For absolute addresses, registers should not
13933 be surrounded by braces. */
13934 if (!REG_P (x))
13935 {
13936 putc ('[', file);
13937 ix86_print_operand (file, x, 0);
13938 putc (']', file);
13939 return;
13940 }
13941 break;
13942
13943 default:
13944 gcc_unreachable ();
13945 }
13946
13947 ix86_print_operand (file, x, 0);
13948 return;
13949
13950
13951 case 'L':
13952 if (ASSEMBLER_DIALECT == ASM_ATT)
13953 putc ('l', file);
13954 return;
13955
13956 case 'W':
13957 if (ASSEMBLER_DIALECT == ASM_ATT)
13958 putc ('w', file);
13959 return;
13960
13961 case 'B':
13962 if (ASSEMBLER_DIALECT == ASM_ATT)
13963 putc ('b', file);
13964 return;
13965
13966 case 'Q':
13967 if (ASSEMBLER_DIALECT == ASM_ATT)
13968 putc ('l', file);
13969 return;
13970
13971 case 'S':
13972 if (ASSEMBLER_DIALECT == ASM_ATT)
13973 putc ('s', file);
13974 return;
13975
13976 case 'T':
13977 if (ASSEMBLER_DIALECT == ASM_ATT)
13978 putc ('t', file);
13979 return;
13980
13981 case 'z':
13982 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13983 {
13984 /* Opcodes don't get size suffixes if using Intel opcodes. */
13985 if (ASSEMBLER_DIALECT == ASM_INTEL)
13986 return;
13987
13988 switch (GET_MODE_SIZE (GET_MODE (x)))
13989 {
13990 case 1:
13991 putc ('b', file);
13992 return;
13993
13994 case 2:
13995 putc ('w', file);
13996 return;
13997
13998 case 4:
13999 putc ('l', file);
14000 return;
14001
14002 case 8:
14003 putc ('q', file);
14004 return;
14005
14006 default:
14007 output_operand_lossage
14008 ("invalid operand size for operand code '%c'", code);
14009 return;
14010 }
14011 }
14012
14013 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14014 warning
14015 (0, "non-integer operand used with operand code '%c'", code);
14016 /* FALLTHRU */
14017
14018 case 'Z':
14019 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14020 if (ASSEMBLER_DIALECT == ASM_INTEL)
14021 return;
14022
14023 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14024 {
14025 switch (GET_MODE_SIZE (GET_MODE (x)))
14026 {
14027 case 2:
14028 #ifdef HAVE_AS_IX86_FILDS
14029 putc ('s', file);
14030 #endif
14031 return;
14032
14033 case 4:
14034 putc ('l', file);
14035 return;
14036
14037 case 8:
14038 #ifdef HAVE_AS_IX86_FILDQ
14039 putc ('q', file);
14040 #else
14041 fputs ("ll", file);
14042 #endif
14043 return;
14044
14045 default:
14046 break;
14047 }
14048 }
14049 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14050 {
14051 /* 387 opcodes don't get size suffixes
14052 if the operands are registers. */
14053 if (STACK_REG_P (x))
14054 return;
14055
14056 switch (GET_MODE_SIZE (GET_MODE (x)))
14057 {
14058 case 4:
14059 putc ('s', file);
14060 return;
14061
14062 case 8:
14063 putc ('l', file);
14064 return;
14065
14066 case 12:
14067 case 16:
14068 putc ('t', file);
14069 return;
14070
14071 default:
14072 break;
14073 }
14074 }
14075 else
14076 {
14077 output_operand_lossage
14078 ("invalid operand type used with operand code '%c'", code);
14079 return;
14080 }
14081
14082 output_operand_lossage
14083 ("invalid operand size for operand code '%c'", code);
14084 return;
14085
14086 case 'd':
14087 case 'b':
14088 case 'w':
14089 case 'k':
14090 case 'q':
14091 case 'h':
14092 case 't':
14093 case 'y':
14094 case 'x':
14095 case 'X':
14096 case 'P':
14097 break;
14098
14099 case 's':
14100 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14101 {
14102 ix86_print_operand (file, x, 0);
14103 fputs (", ", file);
14104 }
14105 return;
14106
14107 case 'D':
14108 /* Little bit of braindamage here. The SSE compare instructions
14109 does use completely different names for the comparisons that the
14110 fp conditional moves. */
14111 if (TARGET_AVX)
14112 {
14113 switch (GET_CODE (x))
14114 {
14115 case EQ:
14116 fputs ("eq", file);
14117 break;
14118 case UNEQ:
14119 fputs ("eq_us", file);
14120 break;
14121 case LT:
14122 fputs ("lt", file);
14123 break;
14124 case UNLT:
14125 fputs ("nge", file);
14126 break;
14127 case LE:
14128 fputs ("le", file);
14129 break;
14130 case UNLE:
14131 fputs ("ngt", file);
14132 break;
14133 case UNORDERED:
14134 fputs ("unord", file);
14135 break;
14136 case NE:
14137 fputs ("neq", file);
14138 break;
14139 case LTGT:
14140 fputs ("neq_oq", file);
14141 break;
14142 case GE:
14143 fputs ("ge", file);
14144 break;
14145 case UNGE:
14146 fputs ("nlt", file);
14147 break;
14148 case GT:
14149 fputs ("gt", file);
14150 break;
14151 case UNGT:
14152 fputs ("nle", file);
14153 break;
14154 case ORDERED:
14155 fputs ("ord", file);
14156 break;
14157 default:
14158 output_operand_lossage ("operand is not a condition code, "
14159 "invalid operand code 'D'");
14160 return;
14161 }
14162 }
14163 else
14164 {
14165 switch (GET_CODE (x))
14166 {
14167 case EQ:
14168 case UNEQ:
14169 fputs ("eq", file);
14170 break;
14171 case LT:
14172 case UNLT:
14173 fputs ("lt", file);
14174 break;
14175 case LE:
14176 case UNLE:
14177 fputs ("le", file);
14178 break;
14179 case UNORDERED:
14180 fputs ("unord", file);
14181 break;
14182 case NE:
14183 case LTGT:
14184 fputs ("neq", file);
14185 break;
14186 case UNGE:
14187 case GE:
14188 fputs ("nlt", file);
14189 break;
14190 case UNGT:
14191 case GT:
14192 fputs ("nle", file);
14193 break;
14194 case ORDERED:
14195 fputs ("ord", file);
14196 break;
14197 default:
14198 output_operand_lossage ("operand is not a condition code, "
14199 "invalid operand code 'D'");
14200 return;
14201 }
14202 }
14203 return;
14204 case 'O':
14205 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14206 if (ASSEMBLER_DIALECT == ASM_ATT)
14207 {
14208 switch (GET_MODE (x))
14209 {
14210 case HImode: putc ('w', file); break;
14211 case SImode:
14212 case SFmode: putc ('l', file); break;
14213 case DImode:
14214 case DFmode: putc ('q', file); break;
14215 default: gcc_unreachable ();
14216 }
14217 putc ('.', file);
14218 }
14219 #endif
14220 return;
14221 case 'C':
14222 if (!COMPARISON_P (x))
14223 {
14224 output_operand_lossage ("operand is neither a constant nor a "
14225 "condition code, invalid operand code "
14226 "'C'");
14227 return;
14228 }
14229 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14230 return;
14231 case 'F':
14232 if (!COMPARISON_P (x))
14233 {
14234 output_operand_lossage ("operand is neither a constant nor a "
14235 "condition code, invalid operand code "
14236 "'F'");
14237 return;
14238 }
14239 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14240 if (ASSEMBLER_DIALECT == ASM_ATT)
14241 putc ('.', file);
14242 #endif
14243 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14244 return;
14245
14246 /* Like above, but reverse condition */
14247 case 'c':
14248 /* Check to see if argument to %c is really a constant
14249 and not a condition code which needs to be reversed. */
14250 if (!COMPARISON_P (x))
14251 {
14252 output_operand_lossage ("operand is neither a constant nor a "
14253 "condition code, invalid operand "
14254 "code 'c'");
14255 return;
14256 }
14257 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14258 return;
14259 case 'f':
14260 if (!COMPARISON_P (x))
14261 {
14262 output_operand_lossage ("operand is neither a constant nor a "
14263 "condition code, invalid operand "
14264 "code 'f'");
14265 return;
14266 }
14267 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14268 if (ASSEMBLER_DIALECT == ASM_ATT)
14269 putc ('.', file);
14270 #endif
14271 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14272 return;
14273
14274 case 'H':
14275 /* It doesn't actually matter what mode we use here, as we're
14276 only going to use this for printing. */
14277 x = adjust_address_nv (x, DImode, 8);
14278 break;
14279
14280 case '+':
14281 {
14282 rtx x;
14283
14284 if (!optimize
14285 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14286 return;
14287
14288 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14289 if (x)
14290 {
14291 int pred_val = INTVAL (XEXP (x, 0));
14292
14293 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14294 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14295 {
14296 int taken = pred_val > REG_BR_PROB_BASE / 2;
14297 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14298
14299 /* Emit hints only in the case default branch prediction
14300 heuristics would fail. */
14301 if (taken != cputaken)
14302 {
14303 /* We use 3e (DS) prefix for taken branches and
14304 2e (CS) prefix for not taken branches. */
14305 if (taken)
14306 fputs ("ds ; ", file);
14307 else
14308 fputs ("cs ; ", file);
14309 }
14310 }
14311 }
14312 return;
14313 }
14314
14315 case 'Y':
14316 switch (GET_CODE (x))
14317 {
14318 case NE:
14319 fputs ("neq", file);
14320 break;
14321 case EQ:
14322 fputs ("eq", file);
14323 break;
14324 case GE:
14325 case GEU:
14326 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14327 break;
14328 case GT:
14329 case GTU:
14330 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14331 break;
14332 case LE:
14333 case LEU:
14334 fputs ("le", file);
14335 break;
14336 case LT:
14337 case LTU:
14338 fputs ("lt", file);
14339 break;
14340 case UNORDERED:
14341 fputs ("unord", file);
14342 break;
14343 case ORDERED:
14344 fputs ("ord", file);
14345 break;
14346 case UNEQ:
14347 fputs ("ueq", file);
14348 break;
14349 case UNGE:
14350 fputs ("nlt", file);
14351 break;
14352 case UNGT:
14353 fputs ("nle", file);
14354 break;
14355 case UNLE:
14356 fputs ("ule", file);
14357 break;
14358 case UNLT:
14359 fputs ("ult", file);
14360 break;
14361 case LTGT:
14362 fputs ("une", file);
14363 break;
14364 default:
14365 output_operand_lossage ("operand is not a condition code, "
14366 "invalid operand code 'Y'");
14367 return;
14368 }
14369 return;
14370
14371 case ';':
14372 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14373 putc (';', file);
14374 #endif
14375 return;
14376
14377 case '@':
14378 if (ASSEMBLER_DIALECT == ASM_ATT)
14379 putc ('%', file);
14380
14381 /* The kernel uses a different segment register for performance
14382 reasons; a system call would not have to trash the userspace
14383 segment register, which would be expensive. */
14384 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14385 fputs ("fs", file);
14386 else
14387 fputs ("gs", file);
14388 return;
14389
14390 default:
14391 output_operand_lossage ("invalid operand code '%c'", code);
14392 }
14393 }
14394
14395 if (REG_P (x))
14396 print_reg (x, code, file);
14397
14398 else if (MEM_P (x))
14399 {
14400 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14401 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14402 && GET_MODE (x) != BLKmode)
14403 {
14404 const char * size;
14405 switch (GET_MODE_SIZE (GET_MODE (x)))
14406 {
14407 case 1: size = "BYTE"; break;
14408 case 2: size = "WORD"; break;
14409 case 4: size = "DWORD"; break;
14410 case 8: size = "QWORD"; break;
14411 case 12: size = "TBYTE"; break;
14412 case 16:
14413 if (GET_MODE (x) == XFmode)
14414 size = "TBYTE";
14415 else
14416 size = "XMMWORD";
14417 break;
14418 case 32: size = "YMMWORD"; break;
14419 default:
14420 gcc_unreachable ();
14421 }
14422
14423 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14424 if (code == 'b')
14425 size = "BYTE";
14426 else if (code == 'w')
14427 size = "WORD";
14428 else if (code == 'k')
14429 size = "DWORD";
14430
14431 fputs (size, file);
14432 fputs (" PTR ", file);
14433 }
14434
14435 x = XEXP (x, 0);
14436 /* Avoid (%rip) for call operands. */
14437 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14438 && !CONST_INT_P (x))
14439 output_addr_const (file, x);
14440 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14441 output_operand_lossage ("invalid constraints for operand");
14442 else
14443 output_address (x);
14444 }
14445
14446 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14447 {
14448 REAL_VALUE_TYPE r;
14449 long l;
14450
14451 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14452 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14453
14454 if (ASSEMBLER_DIALECT == ASM_ATT)
14455 putc ('$', file);
14456 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14457 if (code == 'q')
14458 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14459 else
14460 fprintf (file, "0x%08x", (unsigned int) l);
14461 }
14462
14463 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14464 {
14465 REAL_VALUE_TYPE r;
14466 long l[2];
14467
14468 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14469 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14470
14471 if (ASSEMBLER_DIALECT == ASM_ATT)
14472 putc ('$', file);
14473 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14474 }
14475
14476 /* These float cases don't actually occur as immediate operands. */
14477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14478 {
14479 char dstr[30];
14480
14481 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14482 fputs (dstr, file);
14483 }
14484
14485 else
14486 {
14487 /* We have patterns that allow zero sets of memory, for instance.
14488 In 64-bit mode, we should probably support all 8-byte vectors,
14489 since we can in fact encode that into an immediate. */
14490 if (GET_CODE (x) == CONST_VECTOR)
14491 {
14492 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14493 x = const0_rtx;
14494 }
14495
14496 if (code != 'P')
14497 {
14498 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14499 {
14500 if (ASSEMBLER_DIALECT == ASM_ATT)
14501 putc ('$', file);
14502 }
14503 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14504 || GET_CODE (x) == LABEL_REF)
14505 {
14506 if (ASSEMBLER_DIALECT == ASM_ATT)
14507 putc ('$', file);
14508 else
14509 fputs ("OFFSET FLAT:", file);
14510 }
14511 }
14512 if (CONST_INT_P (x))
14513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14514 else if (flag_pic || MACHOPIC_INDIRECT)
14515 output_pic_addr_const (file, x, code);
14516 else
14517 output_addr_const (file, x);
14518 }
14519 }
14520
14521 static bool
14522 ix86_print_operand_punct_valid_p (unsigned char code)
14523 {
14524 return (code == '@' || code == '*' || code == '+'
14525 || code == '&' || code == ';');
14526 }
14527 \f
14528 /* Print a memory operand whose address is ADDR. */
14529
14530 static void
14531 ix86_print_operand_address (FILE *file, rtx addr)
14532 {
14533 struct ix86_address parts;
14534 rtx base, index, disp;
14535 int scale;
14536 int ok = ix86_decompose_address (addr, &parts);
14537
14538 gcc_assert (ok);
14539
14540 base = parts.base;
14541 index = parts.index;
14542 disp = parts.disp;
14543 scale = parts.scale;
14544
14545 switch (parts.seg)
14546 {
14547 case SEG_DEFAULT:
14548 break;
14549 case SEG_FS:
14550 case SEG_GS:
14551 if (ASSEMBLER_DIALECT == ASM_ATT)
14552 putc ('%', file);
14553 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14554 break;
14555 default:
14556 gcc_unreachable ();
14557 }
14558
14559 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14560 if (TARGET_64BIT && !base && !index)
14561 {
14562 rtx symbol = disp;
14563
14564 if (GET_CODE (disp) == CONST
14565 && GET_CODE (XEXP (disp, 0)) == PLUS
14566 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14567 symbol = XEXP (XEXP (disp, 0), 0);
14568
14569 if (GET_CODE (symbol) == LABEL_REF
14570 || (GET_CODE (symbol) == SYMBOL_REF
14571 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14572 base = pc_rtx;
14573 }
14574 if (!base && !index)
14575 {
14576 /* Displacement only requires special attention. */
14577
14578 if (CONST_INT_P (disp))
14579 {
14580 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14581 fputs ("ds:", file);
14582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14583 }
14584 else if (flag_pic)
14585 output_pic_addr_const (file, disp, 0);
14586 else
14587 output_addr_const (file, disp);
14588 }
14589 else
14590 {
14591 if (ASSEMBLER_DIALECT == ASM_ATT)
14592 {
14593 if (disp)
14594 {
14595 if (flag_pic)
14596 output_pic_addr_const (file, disp, 0);
14597 else if (GET_CODE (disp) == LABEL_REF)
14598 output_asm_label (disp);
14599 else
14600 output_addr_const (file, disp);
14601 }
14602
14603 putc ('(', file);
14604 if (base)
14605 print_reg (base, 0, file);
14606 if (index)
14607 {
14608 putc (',', file);
14609 print_reg (index, 0, file);
14610 if (scale != 1)
14611 fprintf (file, ",%d", scale);
14612 }
14613 putc (')', file);
14614 }
14615 else
14616 {
14617 rtx offset = NULL_RTX;
14618
14619 if (disp)
14620 {
14621 /* Pull out the offset of a symbol; print any symbol itself. */
14622 if (GET_CODE (disp) == CONST
14623 && GET_CODE (XEXP (disp, 0)) == PLUS
14624 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14625 {
14626 offset = XEXP (XEXP (disp, 0), 1);
14627 disp = gen_rtx_CONST (VOIDmode,
14628 XEXP (XEXP (disp, 0), 0));
14629 }
14630
14631 if (flag_pic)
14632 output_pic_addr_const (file, disp, 0);
14633 else if (GET_CODE (disp) == LABEL_REF)
14634 output_asm_label (disp);
14635 else if (CONST_INT_P (disp))
14636 offset = disp;
14637 else
14638 output_addr_const (file, disp);
14639 }
14640
14641 putc ('[', file);
14642 if (base)
14643 {
14644 print_reg (base, 0, file);
14645 if (offset)
14646 {
14647 if (INTVAL (offset) >= 0)
14648 putc ('+', file);
14649 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14650 }
14651 }
14652 else if (offset)
14653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14654 else
14655 putc ('0', file);
14656
14657 if (index)
14658 {
14659 putc ('+', file);
14660 print_reg (index, 0, file);
14661 if (scale != 1)
14662 fprintf (file, "*%d", scale);
14663 }
14664 putc (']', file);
14665 }
14666 }
14667 }
14668
14669 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14670
14671 static bool
14672 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14673 {
14674 rtx op;
14675
14676 if (GET_CODE (x) != UNSPEC)
14677 return false;
14678
14679 op = XVECEXP (x, 0, 0);
14680 switch (XINT (x, 1))
14681 {
14682 case UNSPEC_GOTTPOFF:
14683 output_addr_const (file, op);
14684 /* FIXME: This might be @TPOFF in Sun ld. */
14685 fputs ("@gottpoff", file);
14686 break;
14687 case UNSPEC_TPOFF:
14688 output_addr_const (file, op);
14689 fputs ("@tpoff", file);
14690 break;
14691 case UNSPEC_NTPOFF:
14692 output_addr_const (file, op);
14693 if (TARGET_64BIT)
14694 fputs ("@tpoff", file);
14695 else
14696 fputs ("@ntpoff", file);
14697 break;
14698 case UNSPEC_DTPOFF:
14699 output_addr_const (file, op);
14700 fputs ("@dtpoff", file);
14701 break;
14702 case UNSPEC_GOTNTPOFF:
14703 output_addr_const (file, op);
14704 if (TARGET_64BIT)
14705 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14706 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14707 else
14708 fputs ("@gotntpoff", file);
14709 break;
14710 case UNSPEC_INDNTPOFF:
14711 output_addr_const (file, op);
14712 fputs ("@indntpoff", file);
14713 break;
14714 #if TARGET_MACHO
14715 case UNSPEC_MACHOPIC_OFFSET:
14716 output_addr_const (file, op);
14717 putc ('-', file);
14718 machopic_output_function_base_name (file);
14719 break;
14720 #endif
14721
14722 case UNSPEC_STACK_CHECK:
14723 {
14724 int offset;
14725
14726 gcc_assert (flag_split_stack);
14727
14728 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14729 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14730 #else
14731 gcc_unreachable ();
14732 #endif
14733
14734 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14735 }
14736 break;
14737
14738 default:
14739 return false;
14740 }
14741
14742 return true;
14743 }
14744 \f
14745 /* Split one or more double-mode RTL references into pairs of half-mode
14746 references. The RTL can be REG, offsettable MEM, integer constant, or
14747 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14748 split and "num" is its length. lo_half and hi_half are output arrays
14749 that parallel "operands". */
14750
14751 void
14752 split_double_mode (enum machine_mode mode, rtx operands[],
14753 int num, rtx lo_half[], rtx hi_half[])
14754 {
14755 enum machine_mode half_mode;
14756 unsigned int byte;
14757
14758 switch (mode)
14759 {
14760 case TImode:
14761 half_mode = DImode;
14762 break;
14763 case DImode:
14764 half_mode = SImode;
14765 break;
14766 default:
14767 gcc_unreachable ();
14768 }
14769
14770 byte = GET_MODE_SIZE (half_mode);
14771
14772 while (num--)
14773 {
14774 rtx op = operands[num];
14775
14776 /* simplify_subreg refuse to split volatile memory addresses,
14777 but we still have to handle it. */
14778 if (MEM_P (op))
14779 {
14780 lo_half[num] = adjust_address (op, half_mode, 0);
14781 hi_half[num] = adjust_address (op, half_mode, byte);
14782 }
14783 else
14784 {
14785 lo_half[num] = simplify_gen_subreg (half_mode, op,
14786 GET_MODE (op) == VOIDmode
14787 ? mode : GET_MODE (op), 0);
14788 hi_half[num] = simplify_gen_subreg (half_mode, op,
14789 GET_MODE (op) == VOIDmode
14790 ? mode : GET_MODE (op), byte);
14791 }
14792 }
14793 }
14794 \f
14795 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14796 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14797 is the expression of the binary operation. The output may either be
14798 emitted here, or returned to the caller, like all output_* functions.
14799
14800 There is no guarantee that the operands are the same mode, as they
14801 might be within FLOAT or FLOAT_EXTEND expressions. */
14802
14803 #ifndef SYSV386_COMPAT
14804 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14805 wants to fix the assemblers because that causes incompatibility
14806 with gcc. No-one wants to fix gcc because that causes
14807 incompatibility with assemblers... You can use the option of
14808 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14809 #define SYSV386_COMPAT 1
14810 #endif
14811
14812 const char *
14813 output_387_binary_op (rtx insn, rtx *operands)
14814 {
14815 static char buf[40];
14816 const char *p;
14817 const char *ssep;
14818 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14819
14820 #ifdef ENABLE_CHECKING
14821 /* Even if we do not want to check the inputs, this documents input
14822 constraints. Which helps in understanding the following code. */
14823 if (STACK_REG_P (operands[0])
14824 && ((REG_P (operands[1])
14825 && REGNO (operands[0]) == REGNO (operands[1])
14826 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14827 || (REG_P (operands[2])
14828 && REGNO (operands[0]) == REGNO (operands[2])
14829 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14830 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14831 ; /* ok */
14832 else
14833 gcc_assert (is_sse);
14834 #endif
14835
14836 switch (GET_CODE (operands[3]))
14837 {
14838 case PLUS:
14839 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14840 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14841 p = "fiadd";
14842 else
14843 p = "fadd";
14844 ssep = "vadd";
14845 break;
14846
14847 case MINUS:
14848 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14849 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14850 p = "fisub";
14851 else
14852 p = "fsub";
14853 ssep = "vsub";
14854 break;
14855
14856 case MULT:
14857 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14858 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14859 p = "fimul";
14860 else
14861 p = "fmul";
14862 ssep = "vmul";
14863 break;
14864
14865 case DIV:
14866 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14867 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14868 p = "fidiv";
14869 else
14870 p = "fdiv";
14871 ssep = "vdiv";
14872 break;
14873
14874 default:
14875 gcc_unreachable ();
14876 }
14877
14878 if (is_sse)
14879 {
14880 if (TARGET_AVX)
14881 {
14882 strcpy (buf, ssep);
14883 if (GET_MODE (operands[0]) == SFmode)
14884 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14885 else
14886 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14887 }
14888 else
14889 {
14890 strcpy (buf, ssep + 1);
14891 if (GET_MODE (operands[0]) == SFmode)
14892 strcat (buf, "ss\t{%2, %0|%0, %2}");
14893 else
14894 strcat (buf, "sd\t{%2, %0|%0, %2}");
14895 }
14896 return buf;
14897 }
14898 strcpy (buf, p);
14899
14900 switch (GET_CODE (operands[3]))
14901 {
14902 case MULT:
14903 case PLUS:
14904 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14905 {
14906 rtx temp = operands[2];
14907 operands[2] = operands[1];
14908 operands[1] = temp;
14909 }
14910
14911 /* know operands[0] == operands[1]. */
14912
14913 if (MEM_P (operands[2]))
14914 {
14915 p = "%Z2\t%2";
14916 break;
14917 }
14918
14919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14920 {
14921 if (STACK_TOP_P (operands[0]))
14922 /* How is it that we are storing to a dead operand[2]?
14923 Well, presumably operands[1] is dead too. We can't
14924 store the result to st(0) as st(0) gets popped on this
14925 instruction. Instead store to operands[2] (which I
14926 think has to be st(1)). st(1) will be popped later.
14927 gcc <= 2.8.1 didn't have this check and generated
14928 assembly code that the Unixware assembler rejected. */
14929 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14930 else
14931 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14932 break;
14933 }
14934
14935 if (STACK_TOP_P (operands[0]))
14936 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14937 else
14938 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14939 break;
14940
14941 case MINUS:
14942 case DIV:
14943 if (MEM_P (operands[1]))
14944 {
14945 p = "r%Z1\t%1";
14946 break;
14947 }
14948
14949 if (MEM_P (operands[2]))
14950 {
14951 p = "%Z2\t%2";
14952 break;
14953 }
14954
14955 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14956 {
14957 #if SYSV386_COMPAT
14958 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14959 derived assemblers, confusingly reverse the direction of
14960 the operation for fsub{r} and fdiv{r} when the
14961 destination register is not st(0). The Intel assembler
14962 doesn't have this brain damage. Read !SYSV386_COMPAT to
14963 figure out what the hardware really does. */
14964 if (STACK_TOP_P (operands[0]))
14965 p = "{p\t%0, %2|rp\t%2, %0}";
14966 else
14967 p = "{rp\t%2, %0|p\t%0, %2}";
14968 #else
14969 if (STACK_TOP_P (operands[0]))
14970 /* As above for fmul/fadd, we can't store to st(0). */
14971 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14972 else
14973 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14974 #endif
14975 break;
14976 }
14977
14978 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14979 {
14980 #if SYSV386_COMPAT
14981 if (STACK_TOP_P (operands[0]))
14982 p = "{rp\t%0, %1|p\t%1, %0}";
14983 else
14984 p = "{p\t%1, %0|rp\t%0, %1}";
14985 #else
14986 if (STACK_TOP_P (operands[0]))
14987 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14988 else
14989 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14990 #endif
14991 break;
14992 }
14993
14994 if (STACK_TOP_P (operands[0]))
14995 {
14996 if (STACK_TOP_P (operands[1]))
14997 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14998 else
14999 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15000 break;
15001 }
15002 else if (STACK_TOP_P (operands[1]))
15003 {
15004 #if SYSV386_COMPAT
15005 p = "{\t%1, %0|r\t%0, %1}";
15006 #else
15007 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15008 #endif
15009 }
15010 else
15011 {
15012 #if SYSV386_COMPAT
15013 p = "{r\t%2, %0|\t%0, %2}";
15014 #else
15015 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15016 #endif
15017 }
15018 break;
15019
15020 default:
15021 gcc_unreachable ();
15022 }
15023
15024 strcat (buf, p);
15025 return buf;
15026 }
15027
15028 /* Return needed mode for entity in optimize_mode_switching pass. */
15029
15030 int
15031 ix86_mode_needed (int entity, rtx insn)
15032 {
15033 enum attr_i387_cw mode;
15034
15035 /* The mode UNINITIALIZED is used to store control word after a
15036 function call or ASM pattern. The mode ANY specify that function
15037 has no requirements on the control word and make no changes in the
15038 bits we are interested in. */
15039
15040 if (CALL_P (insn)
15041 || (NONJUMP_INSN_P (insn)
15042 && (asm_noperands (PATTERN (insn)) >= 0
15043 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15044 return I387_CW_UNINITIALIZED;
15045
15046 if (recog_memoized (insn) < 0)
15047 return I387_CW_ANY;
15048
15049 mode = get_attr_i387_cw (insn);
15050
15051 switch (entity)
15052 {
15053 case I387_TRUNC:
15054 if (mode == I387_CW_TRUNC)
15055 return mode;
15056 break;
15057
15058 case I387_FLOOR:
15059 if (mode == I387_CW_FLOOR)
15060 return mode;
15061 break;
15062
15063 case I387_CEIL:
15064 if (mode == I387_CW_CEIL)
15065 return mode;
15066 break;
15067
15068 case I387_MASK_PM:
15069 if (mode == I387_CW_MASK_PM)
15070 return mode;
15071 break;
15072
15073 default:
15074 gcc_unreachable ();
15075 }
15076
15077 return I387_CW_ANY;
15078 }
15079
15080 /* Output code to initialize control word copies used by trunc?f?i and
15081 rounding patterns. CURRENT_MODE is set to current control word,
15082 while NEW_MODE is set to new control word. */
15083
15084 void
15085 emit_i387_cw_initialization (int mode)
15086 {
15087 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15088 rtx new_mode;
15089
15090 enum ix86_stack_slot slot;
15091
15092 rtx reg = gen_reg_rtx (HImode);
15093
15094 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15095 emit_move_insn (reg, copy_rtx (stored_mode));
15096
15097 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15098 || optimize_function_for_size_p (cfun))
15099 {
15100 switch (mode)
15101 {
15102 case I387_CW_TRUNC:
15103 /* round toward zero (truncate) */
15104 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15105 slot = SLOT_CW_TRUNC;
15106 break;
15107
15108 case I387_CW_FLOOR:
15109 /* round down toward -oo */
15110 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15111 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15112 slot = SLOT_CW_FLOOR;
15113 break;
15114
15115 case I387_CW_CEIL:
15116 /* round up toward +oo */
15117 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15118 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15119 slot = SLOT_CW_CEIL;
15120 break;
15121
15122 case I387_CW_MASK_PM:
15123 /* mask precision exception for nearbyint() */
15124 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15125 slot = SLOT_CW_MASK_PM;
15126 break;
15127
15128 default:
15129 gcc_unreachable ();
15130 }
15131 }
15132 else
15133 {
15134 switch (mode)
15135 {
15136 case I387_CW_TRUNC:
15137 /* round toward zero (truncate) */
15138 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15139 slot = SLOT_CW_TRUNC;
15140 break;
15141
15142 case I387_CW_FLOOR:
15143 /* round down toward -oo */
15144 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15145 slot = SLOT_CW_FLOOR;
15146 break;
15147
15148 case I387_CW_CEIL:
15149 /* round up toward +oo */
15150 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15151 slot = SLOT_CW_CEIL;
15152 break;
15153
15154 case I387_CW_MASK_PM:
15155 /* mask precision exception for nearbyint() */
15156 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15157 slot = SLOT_CW_MASK_PM;
15158 break;
15159
15160 default:
15161 gcc_unreachable ();
15162 }
15163 }
15164
15165 gcc_assert (slot < MAX_386_STACK_LOCALS);
15166
15167 new_mode = assign_386_stack_local (HImode, slot);
15168 emit_move_insn (new_mode, reg);
15169 }
15170
15171 /* Output code for INSN to convert a float to a signed int. OPERANDS
15172 are the insn operands. The output may be [HSD]Imode and the input
15173 operand may be [SDX]Fmode. */
15174
15175 const char *
15176 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15177 {
15178 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15179 int dimode_p = GET_MODE (operands[0]) == DImode;
15180 int round_mode = get_attr_i387_cw (insn);
15181
15182 /* Jump through a hoop or two for DImode, since the hardware has no
15183 non-popping instruction. We used to do this a different way, but
15184 that was somewhat fragile and broke with post-reload splitters. */
15185 if ((dimode_p || fisttp) && !stack_top_dies)
15186 output_asm_insn ("fld\t%y1", operands);
15187
15188 gcc_assert (STACK_TOP_P (operands[1]));
15189 gcc_assert (MEM_P (operands[0]));
15190 gcc_assert (GET_MODE (operands[1]) != TFmode);
15191
15192 if (fisttp)
15193 output_asm_insn ("fisttp%Z0\t%0", operands);
15194 else
15195 {
15196 if (round_mode != I387_CW_ANY)
15197 output_asm_insn ("fldcw\t%3", operands);
15198 if (stack_top_dies || dimode_p)
15199 output_asm_insn ("fistp%Z0\t%0", operands);
15200 else
15201 output_asm_insn ("fist%Z0\t%0", operands);
15202 if (round_mode != I387_CW_ANY)
15203 output_asm_insn ("fldcw\t%2", operands);
15204 }
15205
15206 return "";
15207 }
15208
15209 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15210 have the values zero or one, indicates the ffreep insn's operand
15211 from the OPERANDS array. */
15212
15213 static const char *
15214 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15215 {
15216 if (TARGET_USE_FFREEP)
15217 #ifdef HAVE_AS_IX86_FFREEP
15218 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15219 #else
15220 {
15221 static char retval[32];
15222 int regno = REGNO (operands[opno]);
15223
15224 gcc_assert (FP_REGNO_P (regno));
15225
15226 regno -= FIRST_STACK_REG;
15227
15228 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15229 return retval;
15230 }
15231 #endif
15232
15233 return opno ? "fstp\t%y1" : "fstp\t%y0";
15234 }
15235
15236
15237 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15238 should be used. UNORDERED_P is true when fucom should be used. */
15239
15240 const char *
15241 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15242 {
15243 int stack_top_dies;
15244 rtx cmp_op0, cmp_op1;
15245 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15246
15247 if (eflags_p)
15248 {
15249 cmp_op0 = operands[0];
15250 cmp_op1 = operands[1];
15251 }
15252 else
15253 {
15254 cmp_op0 = operands[1];
15255 cmp_op1 = operands[2];
15256 }
15257
15258 if (is_sse)
15259 {
15260 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15261 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15262 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15263 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15264
15265 if (GET_MODE (operands[0]) == SFmode)
15266 if (unordered_p)
15267 return &ucomiss[TARGET_AVX ? 0 : 1];
15268 else
15269 return &comiss[TARGET_AVX ? 0 : 1];
15270 else
15271 if (unordered_p)
15272 return &ucomisd[TARGET_AVX ? 0 : 1];
15273 else
15274 return &comisd[TARGET_AVX ? 0 : 1];
15275 }
15276
15277 gcc_assert (STACK_TOP_P (cmp_op0));
15278
15279 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15280
15281 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15282 {
15283 if (stack_top_dies)
15284 {
15285 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15286 return output_387_ffreep (operands, 1);
15287 }
15288 else
15289 return "ftst\n\tfnstsw\t%0";
15290 }
15291
15292 if (STACK_REG_P (cmp_op1)
15293 && stack_top_dies
15294 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15295 && REGNO (cmp_op1) != FIRST_STACK_REG)
15296 {
15297 /* If both the top of the 387 stack dies, and the other operand
15298 is also a stack register that dies, then this must be a
15299 `fcompp' float compare */
15300
15301 if (eflags_p)
15302 {
15303 /* There is no double popping fcomi variant. Fortunately,
15304 eflags is immune from the fstp's cc clobbering. */
15305 if (unordered_p)
15306 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15307 else
15308 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15309 return output_387_ffreep (operands, 0);
15310 }
15311 else
15312 {
15313 if (unordered_p)
15314 return "fucompp\n\tfnstsw\t%0";
15315 else
15316 return "fcompp\n\tfnstsw\t%0";
15317 }
15318 }
15319 else
15320 {
15321 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15322
15323 static const char * const alt[16] =
15324 {
15325 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15326 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15327 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15328 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15329
15330 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15331 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15332 NULL,
15333 NULL,
15334
15335 "fcomi\t{%y1, %0|%0, %y1}",
15336 "fcomip\t{%y1, %0|%0, %y1}",
15337 "fucomi\t{%y1, %0|%0, %y1}",
15338 "fucomip\t{%y1, %0|%0, %y1}",
15339
15340 NULL,
15341 NULL,
15342 NULL,
15343 NULL
15344 };
15345
15346 int mask;
15347 const char *ret;
15348
15349 mask = eflags_p << 3;
15350 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15351 mask |= unordered_p << 1;
15352 mask |= stack_top_dies;
15353
15354 gcc_assert (mask < 16);
15355 ret = alt[mask];
15356 gcc_assert (ret);
15357
15358 return ret;
15359 }
15360 }
15361
15362 void
15363 ix86_output_addr_vec_elt (FILE *file, int value)
15364 {
15365 const char *directive = ASM_LONG;
15366
15367 #ifdef ASM_QUAD
15368 if (TARGET_64BIT)
15369 directive = ASM_QUAD;
15370 #else
15371 gcc_assert (!TARGET_64BIT);
15372 #endif
15373
15374 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15375 }
15376
15377 void
15378 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15379 {
15380 const char *directive = ASM_LONG;
15381
15382 #ifdef ASM_QUAD
15383 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15384 directive = ASM_QUAD;
15385 #else
15386 gcc_assert (!TARGET_64BIT);
15387 #endif
15388 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15389 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15390 fprintf (file, "%s%s%d-%s%d\n",
15391 directive, LPREFIX, value, LPREFIX, rel);
15392 else if (HAVE_AS_GOTOFF_IN_DATA)
15393 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15394 #if TARGET_MACHO
15395 else if (TARGET_MACHO)
15396 {
15397 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15398 machopic_output_function_base_name (file);
15399 putc ('\n', file);
15400 }
15401 #endif
15402 else
15403 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15404 GOT_SYMBOL_NAME, LPREFIX, value);
15405 }
15406 \f
15407 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15408 for the target. */
15409
15410 void
15411 ix86_expand_clear (rtx dest)
15412 {
15413 rtx tmp;
15414
15415 /* We play register width games, which are only valid after reload. */
15416 gcc_assert (reload_completed);
15417
15418 /* Avoid HImode and its attendant prefix byte. */
15419 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15420 dest = gen_rtx_REG (SImode, REGNO (dest));
15421 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15422
15423 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15424 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15425 {
15426 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15427 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15428 }
15429
15430 emit_insn (tmp);
15431 }
15432
15433 /* X is an unchanging MEM. If it is a constant pool reference, return
15434 the constant pool rtx, else NULL. */
15435
15436 rtx
15437 maybe_get_pool_constant (rtx x)
15438 {
15439 x = ix86_delegitimize_address (XEXP (x, 0));
15440
15441 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15442 return get_pool_constant (x);
15443
15444 return NULL_RTX;
15445 }
15446
15447 void
15448 ix86_expand_move (enum machine_mode mode, rtx operands[])
15449 {
15450 rtx op0, op1;
15451 enum tls_model model;
15452
15453 op0 = operands[0];
15454 op1 = operands[1];
15455
15456 if (GET_CODE (op1) == SYMBOL_REF)
15457 {
15458 model = SYMBOL_REF_TLS_MODEL (op1);
15459 if (model)
15460 {
15461 op1 = legitimize_tls_address (op1, model, true);
15462 op1 = force_operand (op1, op0);
15463 if (op1 == op0)
15464 return;
15465 }
15466 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15467 && SYMBOL_REF_DLLIMPORT_P (op1))
15468 op1 = legitimize_dllimport_symbol (op1, false);
15469 }
15470 else if (GET_CODE (op1) == CONST
15471 && GET_CODE (XEXP (op1, 0)) == PLUS
15472 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15473 {
15474 rtx addend = XEXP (XEXP (op1, 0), 1);
15475 rtx symbol = XEXP (XEXP (op1, 0), 0);
15476 rtx tmp = NULL;
15477
15478 model = SYMBOL_REF_TLS_MODEL (symbol);
15479 if (model)
15480 tmp = legitimize_tls_address (symbol, model, true);
15481 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15482 && SYMBOL_REF_DLLIMPORT_P (symbol))
15483 tmp = legitimize_dllimport_symbol (symbol, true);
15484
15485 if (tmp)
15486 {
15487 tmp = force_operand (tmp, NULL);
15488 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15489 op0, 1, OPTAB_DIRECT);
15490 if (tmp == op0)
15491 return;
15492 }
15493 }
15494
15495 if ((flag_pic || MACHOPIC_INDIRECT)
15496 && mode == Pmode && symbolic_operand (op1, Pmode))
15497 {
15498 if (TARGET_MACHO && !TARGET_64BIT)
15499 {
15500 #if TARGET_MACHO
15501 /* dynamic-no-pic */
15502 if (MACHOPIC_INDIRECT)
15503 {
15504 rtx temp = ((reload_in_progress
15505 || ((op0 && REG_P (op0))
15506 && mode == Pmode))
15507 ? op0 : gen_reg_rtx (Pmode));
15508 op1 = machopic_indirect_data_reference (op1, temp);
15509 if (MACHOPIC_PURE)
15510 op1 = machopic_legitimize_pic_address (op1, mode,
15511 temp == op1 ? 0 : temp);
15512 }
15513 if (op0 != op1 && GET_CODE (op0) != MEM)
15514 {
15515 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15516 emit_insn (insn);
15517 return;
15518 }
15519 if (GET_CODE (op0) == MEM)
15520 op1 = force_reg (Pmode, op1);
15521 else
15522 {
15523 rtx temp = op0;
15524 if (GET_CODE (temp) != REG)
15525 temp = gen_reg_rtx (Pmode);
15526 temp = legitimize_pic_address (op1, temp);
15527 if (temp == op0)
15528 return;
15529 op1 = temp;
15530 }
15531 /* dynamic-no-pic */
15532 #endif
15533 }
15534 else
15535 {
15536 if (MEM_P (op0))
15537 op1 = force_reg (Pmode, op1);
15538 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15539 {
15540 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15541 op1 = legitimize_pic_address (op1, reg);
15542 if (op0 == op1)
15543 return;
15544 }
15545 }
15546 }
15547 else
15548 {
15549 if (MEM_P (op0)
15550 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15551 || !push_operand (op0, mode))
15552 && MEM_P (op1))
15553 op1 = force_reg (mode, op1);
15554
15555 if (push_operand (op0, mode)
15556 && ! general_no_elim_operand (op1, mode))
15557 op1 = copy_to_mode_reg (mode, op1);
15558
15559 /* Force large constants in 64bit compilation into register
15560 to get them CSEed. */
15561 if (can_create_pseudo_p ()
15562 && (mode == DImode) && TARGET_64BIT
15563 && immediate_operand (op1, mode)
15564 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15565 && !register_operand (op0, mode)
15566 && optimize)
15567 op1 = copy_to_mode_reg (mode, op1);
15568
15569 if (can_create_pseudo_p ()
15570 && FLOAT_MODE_P (mode)
15571 && GET_CODE (op1) == CONST_DOUBLE)
15572 {
15573 /* If we are loading a floating point constant to a register,
15574 force the value to memory now, since we'll get better code
15575 out the back end. */
15576
15577 op1 = validize_mem (force_const_mem (mode, op1));
15578 if (!register_operand (op0, mode))
15579 {
15580 rtx temp = gen_reg_rtx (mode);
15581 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15582 emit_move_insn (op0, temp);
15583 return;
15584 }
15585 }
15586 }
15587
15588 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15589 }
15590
15591 void
15592 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15593 {
15594 rtx op0 = operands[0], op1 = operands[1];
15595 unsigned int align = GET_MODE_ALIGNMENT (mode);
15596
15597 /* Force constants other than zero into memory. We do not know how
15598 the instructions used to build constants modify the upper 64 bits
15599 of the register, once we have that information we may be able
15600 to handle some of them more efficiently. */
15601 if (can_create_pseudo_p ()
15602 && register_operand (op0, mode)
15603 && (CONSTANT_P (op1)
15604 || (GET_CODE (op1) == SUBREG
15605 && CONSTANT_P (SUBREG_REG (op1))))
15606 && !standard_sse_constant_p (op1))
15607 op1 = validize_mem (force_const_mem (mode, op1));
15608
15609 /* We need to check memory alignment for SSE mode since attribute
15610 can make operands unaligned. */
15611 if (can_create_pseudo_p ()
15612 && SSE_REG_MODE_P (mode)
15613 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15614 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15615 {
15616 rtx tmp[2];
15617
15618 /* ix86_expand_vector_move_misalign() does not like constants ... */
15619 if (CONSTANT_P (op1)
15620 || (GET_CODE (op1) == SUBREG
15621 && CONSTANT_P (SUBREG_REG (op1))))
15622 op1 = validize_mem (force_const_mem (mode, op1));
15623
15624 /* ... nor both arguments in memory. */
15625 if (!register_operand (op0, mode)
15626 && !register_operand (op1, mode))
15627 op1 = force_reg (mode, op1);
15628
15629 tmp[0] = op0; tmp[1] = op1;
15630 ix86_expand_vector_move_misalign (mode, tmp);
15631 return;
15632 }
15633
15634 /* Make operand1 a register if it isn't already. */
15635 if (can_create_pseudo_p ()
15636 && !register_operand (op0, mode)
15637 && !register_operand (op1, mode))
15638 {
15639 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15640 return;
15641 }
15642
15643 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15644 }
15645
15646 /* Split 32-byte AVX unaligned load and store if needed. */
15647
15648 static void
15649 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15650 {
15651 rtx m;
15652 rtx (*extract) (rtx, rtx, rtx);
15653 rtx (*move_unaligned) (rtx, rtx);
15654 enum machine_mode mode;
15655
15656 switch (GET_MODE (op0))
15657 {
15658 default:
15659 gcc_unreachable ();
15660 case V32QImode:
15661 extract = gen_avx_vextractf128v32qi;
15662 move_unaligned = gen_avx_movdqu256;
15663 mode = V16QImode;
15664 break;
15665 case V8SFmode:
15666 extract = gen_avx_vextractf128v8sf;
15667 move_unaligned = gen_avx_movups256;
15668 mode = V4SFmode;
15669 break;
15670 case V4DFmode:
15671 extract = gen_avx_vextractf128v4df;
15672 move_unaligned = gen_avx_movupd256;
15673 mode = V2DFmode;
15674 break;
15675 }
15676
15677 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15678 {
15679 rtx r = gen_reg_rtx (mode);
15680 m = adjust_address (op1, mode, 0);
15681 emit_move_insn (r, m);
15682 m = adjust_address (op1, mode, 16);
15683 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15684 emit_move_insn (op0, r);
15685 }
15686 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15687 {
15688 m = adjust_address (op0, mode, 0);
15689 emit_insn (extract (m, op1, const0_rtx));
15690 m = adjust_address (op0, mode, 16);
15691 emit_insn (extract (m, op1, const1_rtx));
15692 }
15693 else
15694 emit_insn (move_unaligned (op0, op1));
15695 }
15696
15697 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15698 straight to ix86_expand_vector_move. */
15699 /* Code generation for scalar reg-reg moves of single and double precision data:
15700 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15701 movaps reg, reg
15702 else
15703 movss reg, reg
15704 if (x86_sse_partial_reg_dependency == true)
15705 movapd reg, reg
15706 else
15707 movsd reg, reg
15708
15709 Code generation for scalar loads of double precision data:
15710 if (x86_sse_split_regs == true)
15711 movlpd mem, reg (gas syntax)
15712 else
15713 movsd mem, reg
15714
15715 Code generation for unaligned packed loads of single precision data
15716 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15717 if (x86_sse_unaligned_move_optimal)
15718 movups mem, reg
15719
15720 if (x86_sse_partial_reg_dependency == true)
15721 {
15722 xorps reg, reg
15723 movlps mem, reg
15724 movhps mem+8, reg
15725 }
15726 else
15727 {
15728 movlps mem, reg
15729 movhps mem+8, reg
15730 }
15731
15732 Code generation for unaligned packed loads of double precision data
15733 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15734 if (x86_sse_unaligned_move_optimal)
15735 movupd mem, reg
15736
15737 if (x86_sse_split_regs == true)
15738 {
15739 movlpd mem, reg
15740 movhpd mem+8, reg
15741 }
15742 else
15743 {
15744 movsd mem, reg
15745 movhpd mem+8, reg
15746 }
15747 */
15748
15749 void
15750 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15751 {
15752 rtx op0, op1, m;
15753
15754 op0 = operands[0];
15755 op1 = operands[1];
15756
15757 if (TARGET_AVX)
15758 {
15759 switch (GET_MODE_CLASS (mode))
15760 {
15761 case MODE_VECTOR_INT:
15762 case MODE_INT:
15763 switch (GET_MODE_SIZE (mode))
15764 {
15765 case 16:
15766 /* If we're optimizing for size, movups is the smallest. */
15767 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15768 {
15769 op0 = gen_lowpart (V4SFmode, op0);
15770 op1 = gen_lowpart (V4SFmode, op1);
15771 emit_insn (gen_sse_movups (op0, op1));
15772 return;
15773 }
15774 op0 = gen_lowpart (V16QImode, op0);
15775 op1 = gen_lowpart (V16QImode, op1);
15776 emit_insn (gen_sse2_movdqu (op0, op1));
15777 break;
15778 case 32:
15779 op0 = gen_lowpart (V32QImode, op0);
15780 op1 = gen_lowpart (V32QImode, op1);
15781 ix86_avx256_split_vector_move_misalign (op0, op1);
15782 break;
15783 default:
15784 gcc_unreachable ();
15785 }
15786 break;
15787 case MODE_VECTOR_FLOAT:
15788 op0 = gen_lowpart (mode, op0);
15789 op1 = gen_lowpart (mode, op1);
15790
15791 switch (mode)
15792 {
15793 case V4SFmode:
15794 emit_insn (gen_sse_movups (op0, op1));
15795 break;
15796 case V8SFmode:
15797 ix86_avx256_split_vector_move_misalign (op0, op1);
15798 break;
15799 case V2DFmode:
15800 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15801 {
15802 op0 = gen_lowpart (V4SFmode, op0);
15803 op1 = gen_lowpart (V4SFmode, op1);
15804 emit_insn (gen_sse_movups (op0, op1));
15805 return;
15806 }
15807 emit_insn (gen_sse2_movupd (op0, op1));
15808 break;
15809 case V4DFmode:
15810 ix86_avx256_split_vector_move_misalign (op0, op1);
15811 break;
15812 default:
15813 gcc_unreachable ();
15814 }
15815 break;
15816
15817 default:
15818 gcc_unreachable ();
15819 }
15820
15821 return;
15822 }
15823
15824 if (MEM_P (op1))
15825 {
15826 /* If we're optimizing for size, movups is the smallest. */
15827 if (optimize_insn_for_size_p ()
15828 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15829 {
15830 op0 = gen_lowpart (V4SFmode, op0);
15831 op1 = gen_lowpart (V4SFmode, op1);
15832 emit_insn (gen_sse_movups (op0, op1));
15833 return;
15834 }
15835
15836 /* ??? If we have typed data, then it would appear that using
15837 movdqu is the only way to get unaligned data loaded with
15838 integer type. */
15839 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15840 {
15841 op0 = gen_lowpart (V16QImode, op0);
15842 op1 = gen_lowpart (V16QImode, op1);
15843 emit_insn (gen_sse2_movdqu (op0, op1));
15844 return;
15845 }
15846
15847 if (TARGET_SSE2 && mode == V2DFmode)
15848 {
15849 rtx zero;
15850
15851 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15852 {
15853 op0 = gen_lowpart (V2DFmode, op0);
15854 op1 = gen_lowpart (V2DFmode, op1);
15855 emit_insn (gen_sse2_movupd (op0, op1));
15856 return;
15857 }
15858
15859 /* When SSE registers are split into halves, we can avoid
15860 writing to the top half twice. */
15861 if (TARGET_SSE_SPLIT_REGS)
15862 {
15863 emit_clobber (op0);
15864 zero = op0;
15865 }
15866 else
15867 {
15868 /* ??? Not sure about the best option for the Intel chips.
15869 The following would seem to satisfy; the register is
15870 entirely cleared, breaking the dependency chain. We
15871 then store to the upper half, with a dependency depth
15872 of one. A rumor has it that Intel recommends two movsd
15873 followed by an unpacklpd, but this is unconfirmed. And
15874 given that the dependency depth of the unpacklpd would
15875 still be one, I'm not sure why this would be better. */
15876 zero = CONST0_RTX (V2DFmode);
15877 }
15878
15879 m = adjust_address (op1, DFmode, 0);
15880 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15881 m = adjust_address (op1, DFmode, 8);
15882 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15883 }
15884 else
15885 {
15886 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15887 {
15888 op0 = gen_lowpart (V4SFmode, op0);
15889 op1 = gen_lowpart (V4SFmode, op1);
15890 emit_insn (gen_sse_movups (op0, op1));
15891 return;
15892 }
15893
15894 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15895 emit_move_insn (op0, CONST0_RTX (mode));
15896 else
15897 emit_clobber (op0);
15898
15899 if (mode != V4SFmode)
15900 op0 = gen_lowpart (V4SFmode, op0);
15901 m = adjust_address (op1, V2SFmode, 0);
15902 emit_insn (gen_sse_loadlps (op0, op0, m));
15903 m = adjust_address (op1, V2SFmode, 8);
15904 emit_insn (gen_sse_loadhps (op0, op0, m));
15905 }
15906 }
15907 else if (MEM_P (op0))
15908 {
15909 /* If we're optimizing for size, movups is the smallest. */
15910 if (optimize_insn_for_size_p ()
15911 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15912 {
15913 op0 = gen_lowpart (V4SFmode, op0);
15914 op1 = gen_lowpart (V4SFmode, op1);
15915 emit_insn (gen_sse_movups (op0, op1));
15916 return;
15917 }
15918
15919 /* ??? Similar to above, only less clear because of quote
15920 typeless stores unquote. */
15921 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15922 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15923 {
15924 op0 = gen_lowpart (V16QImode, op0);
15925 op1 = gen_lowpart (V16QImode, op1);
15926 emit_insn (gen_sse2_movdqu (op0, op1));
15927 return;
15928 }
15929
15930 if (TARGET_SSE2 && mode == V2DFmode)
15931 {
15932 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15933 {
15934 op0 = gen_lowpart (V2DFmode, op0);
15935 op1 = gen_lowpart (V2DFmode, op1);
15936 emit_insn (gen_sse2_movupd (op0, op1));
15937 }
15938 else
15939 {
15940 m = adjust_address (op0, DFmode, 0);
15941 emit_insn (gen_sse2_storelpd (m, op1));
15942 m = adjust_address (op0, DFmode, 8);
15943 emit_insn (gen_sse2_storehpd (m, op1));
15944 }
15945 }
15946 else
15947 {
15948 if (mode != V4SFmode)
15949 op1 = gen_lowpart (V4SFmode, op1);
15950
15951 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15952 {
15953 op0 = gen_lowpart (V4SFmode, op0);
15954 emit_insn (gen_sse_movups (op0, op1));
15955 }
15956 else
15957 {
15958 m = adjust_address (op0, V2SFmode, 0);
15959 emit_insn (gen_sse_storelps (m, op1));
15960 m = adjust_address (op0, V2SFmode, 8);
15961 emit_insn (gen_sse_storehps (m, op1));
15962 }
15963 }
15964 }
15965 else
15966 gcc_unreachable ();
15967 }
15968
15969 /* Expand a push in MODE. This is some mode for which we do not support
15970 proper push instructions, at least from the registers that we expect
15971 the value to live in. */
15972
15973 void
15974 ix86_expand_push (enum machine_mode mode, rtx x)
15975 {
15976 rtx tmp;
15977
15978 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15979 GEN_INT (-GET_MODE_SIZE (mode)),
15980 stack_pointer_rtx, 1, OPTAB_DIRECT);
15981 if (tmp != stack_pointer_rtx)
15982 emit_move_insn (stack_pointer_rtx, tmp);
15983
15984 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15985
15986 /* When we push an operand onto stack, it has to be aligned at least
15987 at the function argument boundary. However since we don't have
15988 the argument type, we can't determine the actual argument
15989 boundary. */
15990 emit_move_insn (tmp, x);
15991 }
15992
15993 /* Helper function of ix86_fixup_binary_operands to canonicalize
15994 operand order. Returns true if the operands should be swapped. */
15995
15996 static bool
15997 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15998 rtx operands[])
15999 {
16000 rtx dst = operands[0];
16001 rtx src1 = operands[1];
16002 rtx src2 = operands[2];
16003
16004 /* If the operation is not commutative, we can't do anything. */
16005 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16006 return false;
16007
16008 /* Highest priority is that src1 should match dst. */
16009 if (rtx_equal_p (dst, src1))
16010 return false;
16011 if (rtx_equal_p (dst, src2))
16012 return true;
16013
16014 /* Next highest priority is that immediate constants come second. */
16015 if (immediate_operand (src2, mode))
16016 return false;
16017 if (immediate_operand (src1, mode))
16018 return true;
16019
16020 /* Lowest priority is that memory references should come second. */
16021 if (MEM_P (src2))
16022 return false;
16023 if (MEM_P (src1))
16024 return true;
16025
16026 return false;
16027 }
16028
16029
16030 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16031 destination to use for the operation. If different from the true
16032 destination in operands[0], a copy operation will be required. */
16033
16034 rtx
16035 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16036 rtx operands[])
16037 {
16038 rtx dst = operands[0];
16039 rtx src1 = operands[1];
16040 rtx src2 = operands[2];
16041
16042 /* Canonicalize operand order. */
16043 if (ix86_swap_binary_operands_p (code, mode, operands))
16044 {
16045 rtx temp;
16046
16047 /* It is invalid to swap operands of different modes. */
16048 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16049
16050 temp = src1;
16051 src1 = src2;
16052 src2 = temp;
16053 }
16054
16055 /* Both source operands cannot be in memory. */
16056 if (MEM_P (src1) && MEM_P (src2))
16057 {
16058 /* Optimization: Only read from memory once. */
16059 if (rtx_equal_p (src1, src2))
16060 {
16061 src2 = force_reg (mode, src2);
16062 src1 = src2;
16063 }
16064 else
16065 src2 = force_reg (mode, src2);
16066 }
16067
16068 /* If the destination is memory, and we do not have matching source
16069 operands, do things in registers. */
16070 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16071 dst = gen_reg_rtx (mode);
16072
16073 /* Source 1 cannot be a constant. */
16074 if (CONSTANT_P (src1))
16075 src1 = force_reg (mode, src1);
16076
16077 /* Source 1 cannot be a non-matching memory. */
16078 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16079 src1 = force_reg (mode, src1);
16080
16081 operands[1] = src1;
16082 operands[2] = src2;
16083 return dst;
16084 }
16085
16086 /* Similarly, but assume that the destination has already been
16087 set up properly. */
16088
16089 void
16090 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16091 enum machine_mode mode, rtx operands[])
16092 {
16093 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16094 gcc_assert (dst == operands[0]);
16095 }
16096
16097 /* Attempt to expand a binary operator. Make the expansion closer to the
16098 actual machine, then just general_operand, which will allow 3 separate
16099 memory references (one output, two input) in a single insn. */
16100
16101 void
16102 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16103 rtx operands[])
16104 {
16105 rtx src1, src2, dst, op, clob;
16106
16107 dst = ix86_fixup_binary_operands (code, mode, operands);
16108 src1 = operands[1];
16109 src2 = operands[2];
16110
16111 /* Emit the instruction. */
16112
16113 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16114 if (reload_in_progress)
16115 {
16116 /* Reload doesn't know about the flags register, and doesn't know that
16117 it doesn't want to clobber it. We can only do this with PLUS. */
16118 gcc_assert (code == PLUS);
16119 emit_insn (op);
16120 }
16121 else if (reload_completed
16122 && code == PLUS
16123 && !rtx_equal_p (dst, src1))
16124 {
16125 /* This is going to be an LEA; avoid splitting it later. */
16126 emit_insn (op);
16127 }
16128 else
16129 {
16130 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16131 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16132 }
16133
16134 /* Fix up the destination if needed. */
16135 if (dst != operands[0])
16136 emit_move_insn (operands[0], dst);
16137 }
16138
16139 /* Return TRUE or FALSE depending on whether the binary operator meets the
16140 appropriate constraints. */
16141
16142 bool
16143 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16144 rtx operands[3])
16145 {
16146 rtx dst = operands[0];
16147 rtx src1 = operands[1];
16148 rtx src2 = operands[2];
16149
16150 /* Both source operands cannot be in memory. */
16151 if (MEM_P (src1) && MEM_P (src2))
16152 return false;
16153
16154 /* Canonicalize operand order for commutative operators. */
16155 if (ix86_swap_binary_operands_p (code, mode, operands))
16156 {
16157 rtx temp = src1;
16158 src1 = src2;
16159 src2 = temp;
16160 }
16161
16162 /* If the destination is memory, we must have a matching source operand. */
16163 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16164 return false;
16165
16166 /* Source 1 cannot be a constant. */
16167 if (CONSTANT_P (src1))
16168 return false;
16169
16170 /* Source 1 cannot be a non-matching memory. */
16171 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16172 {
16173 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16174 return (code == AND
16175 && (mode == HImode
16176 || mode == SImode
16177 || (TARGET_64BIT && mode == DImode))
16178 && CONST_INT_P (src2)
16179 && (INTVAL (src2) == 0xff
16180 || INTVAL (src2) == 0xffff));
16181 }
16182
16183 return true;
16184 }
16185
16186 /* Attempt to expand a unary operator. Make the expansion closer to the
16187 actual machine, then just general_operand, which will allow 2 separate
16188 memory references (one output, one input) in a single insn. */
16189
16190 void
16191 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16192 rtx operands[])
16193 {
16194 int matching_memory;
16195 rtx src, dst, op, clob;
16196
16197 dst = operands[0];
16198 src = operands[1];
16199
16200 /* If the destination is memory, and we do not have matching source
16201 operands, do things in registers. */
16202 matching_memory = 0;
16203 if (MEM_P (dst))
16204 {
16205 if (rtx_equal_p (dst, src))
16206 matching_memory = 1;
16207 else
16208 dst = gen_reg_rtx (mode);
16209 }
16210
16211 /* When source operand is memory, destination must match. */
16212 if (MEM_P (src) && !matching_memory)
16213 src = force_reg (mode, src);
16214
16215 /* Emit the instruction. */
16216
16217 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16218 if (reload_in_progress || code == NOT)
16219 {
16220 /* Reload doesn't know about the flags register, and doesn't know that
16221 it doesn't want to clobber it. */
16222 gcc_assert (code == NOT);
16223 emit_insn (op);
16224 }
16225 else
16226 {
16227 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16228 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16229 }
16230
16231 /* Fix up the destination if needed. */
16232 if (dst != operands[0])
16233 emit_move_insn (operands[0], dst);
16234 }
16235
16236 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16237 divisor are within the range [0-255]. */
16238
16239 void
16240 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16241 bool signed_p)
16242 {
16243 rtx end_label, qimode_label;
16244 rtx insn, div, mod;
16245 rtx scratch, tmp0, tmp1, tmp2;
16246 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16247 rtx (*gen_zero_extend) (rtx, rtx);
16248 rtx (*gen_test_ccno_1) (rtx, rtx);
16249
16250 switch (mode)
16251 {
16252 case SImode:
16253 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16254 gen_test_ccno_1 = gen_testsi_ccno_1;
16255 gen_zero_extend = gen_zero_extendqisi2;
16256 break;
16257 case DImode:
16258 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16259 gen_test_ccno_1 = gen_testdi_ccno_1;
16260 gen_zero_extend = gen_zero_extendqidi2;
16261 break;
16262 default:
16263 gcc_unreachable ();
16264 }
16265
16266 end_label = gen_label_rtx ();
16267 qimode_label = gen_label_rtx ();
16268
16269 scratch = gen_reg_rtx (mode);
16270
16271 /* Use 8bit unsigned divimod if dividend and divisor are within
16272 the range [0-255]. */
16273 emit_move_insn (scratch, operands[2]);
16274 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16275 scratch, 1, OPTAB_DIRECT);
16276 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16277 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16278 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16279 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16280 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16281 pc_rtx);
16282 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16283 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16284 JUMP_LABEL (insn) = qimode_label;
16285
16286 /* Generate original signed/unsigned divimod. */
16287 div = gen_divmod4_1 (operands[0], operands[1],
16288 operands[2], operands[3]);
16289 emit_insn (div);
16290
16291 /* Branch to the end. */
16292 emit_jump_insn (gen_jump (end_label));
16293 emit_barrier ();
16294
16295 /* Generate 8bit unsigned divide. */
16296 emit_label (qimode_label);
16297 /* Don't use operands[0] for result of 8bit divide since not all
16298 registers support QImode ZERO_EXTRACT. */
16299 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16300 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16301 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16302 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16303
16304 if (signed_p)
16305 {
16306 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16307 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16308 }
16309 else
16310 {
16311 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16312 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16313 }
16314
16315 /* Extract remainder from AH. */
16316 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16317 if (REG_P (operands[1]))
16318 insn = emit_move_insn (operands[1], tmp1);
16319 else
16320 {
16321 /* Need a new scratch register since the old one has result
16322 of 8bit divide. */
16323 scratch = gen_reg_rtx (mode);
16324 emit_move_insn (scratch, tmp1);
16325 insn = emit_move_insn (operands[1], scratch);
16326 }
16327 set_unique_reg_note (insn, REG_EQUAL, mod);
16328
16329 /* Zero extend quotient from AL. */
16330 tmp1 = gen_lowpart (QImode, tmp0);
16331 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16332 set_unique_reg_note (insn, REG_EQUAL, div);
16333
16334 emit_label (end_label);
16335 }
16336
16337 #define LEA_SEARCH_THRESHOLD 12
16338
16339 /* Search backward for non-agu definition of register number REGNO1
16340 or register number REGNO2 in INSN's basic block until
16341 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16342 2. Reach BB boundary, or
16343 3. Reach agu definition.
16344 Returns the distance between the non-agu definition point and INSN.
16345 If no definition point, returns -1. */
16346
16347 static int
16348 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16349 rtx insn)
16350 {
16351 basic_block bb = BLOCK_FOR_INSN (insn);
16352 int distance = 0;
16353 df_ref *def_rec;
16354 enum attr_type insn_type;
16355
16356 if (insn != BB_HEAD (bb))
16357 {
16358 rtx prev = PREV_INSN (insn);
16359 while (prev && distance < LEA_SEARCH_THRESHOLD)
16360 {
16361 if (NONDEBUG_INSN_P (prev))
16362 {
16363 distance++;
16364 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16365 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16366 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16367 && (regno1 == DF_REF_REGNO (*def_rec)
16368 || regno2 == DF_REF_REGNO (*def_rec)))
16369 {
16370 insn_type = get_attr_type (prev);
16371 if (insn_type != TYPE_LEA)
16372 goto done;
16373 }
16374 }
16375 if (prev == BB_HEAD (bb))
16376 break;
16377 prev = PREV_INSN (prev);
16378 }
16379 }
16380
16381 if (distance < LEA_SEARCH_THRESHOLD)
16382 {
16383 edge e;
16384 edge_iterator ei;
16385 bool simple_loop = false;
16386
16387 FOR_EACH_EDGE (e, ei, bb->preds)
16388 if (e->src == bb)
16389 {
16390 simple_loop = true;
16391 break;
16392 }
16393
16394 if (simple_loop)
16395 {
16396 rtx prev = BB_END (bb);
16397 while (prev
16398 && prev != insn
16399 && distance < LEA_SEARCH_THRESHOLD)
16400 {
16401 if (NONDEBUG_INSN_P (prev))
16402 {
16403 distance++;
16404 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16405 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16406 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16407 && (regno1 == DF_REF_REGNO (*def_rec)
16408 || regno2 == DF_REF_REGNO (*def_rec)))
16409 {
16410 insn_type = get_attr_type (prev);
16411 if (insn_type != TYPE_LEA)
16412 goto done;
16413 }
16414 }
16415 prev = PREV_INSN (prev);
16416 }
16417 }
16418 }
16419
16420 distance = -1;
16421
16422 done:
16423 /* get_attr_type may modify recog data. We want to make sure
16424 that recog data is valid for instruction INSN, on which
16425 distance_non_agu_define is called. INSN is unchanged here. */
16426 extract_insn_cached (insn);
16427 return distance;
16428 }
16429
16430 /* Return the distance between INSN and the next insn that uses
16431 register number REGNO0 in memory address. Return -1 if no such
16432 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16433
16434 static int
16435 distance_agu_use (unsigned int regno0, rtx insn)
16436 {
16437 basic_block bb = BLOCK_FOR_INSN (insn);
16438 int distance = 0;
16439 df_ref *def_rec;
16440 df_ref *use_rec;
16441
16442 if (insn != BB_END (bb))
16443 {
16444 rtx next = NEXT_INSN (insn);
16445 while (next && distance < LEA_SEARCH_THRESHOLD)
16446 {
16447 if (NONDEBUG_INSN_P (next))
16448 {
16449 distance++;
16450
16451 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16452 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16453 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16454 && regno0 == DF_REF_REGNO (*use_rec))
16455 {
16456 /* Return DISTANCE if OP0 is used in memory
16457 address in NEXT. */
16458 return distance;
16459 }
16460
16461 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16462 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16463 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16464 && regno0 == DF_REF_REGNO (*def_rec))
16465 {
16466 /* Return -1 if OP0 is set in NEXT. */
16467 return -1;
16468 }
16469 }
16470 if (next == BB_END (bb))
16471 break;
16472 next = NEXT_INSN (next);
16473 }
16474 }
16475
16476 if (distance < LEA_SEARCH_THRESHOLD)
16477 {
16478 edge e;
16479 edge_iterator ei;
16480 bool simple_loop = false;
16481
16482 FOR_EACH_EDGE (e, ei, bb->succs)
16483 if (e->dest == bb)
16484 {
16485 simple_loop = true;
16486 break;
16487 }
16488
16489 if (simple_loop)
16490 {
16491 rtx next = BB_HEAD (bb);
16492 while (next
16493 && next != insn
16494 && distance < LEA_SEARCH_THRESHOLD)
16495 {
16496 if (NONDEBUG_INSN_P (next))
16497 {
16498 distance++;
16499
16500 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16501 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16502 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16503 && regno0 == DF_REF_REGNO (*use_rec))
16504 {
16505 /* Return DISTANCE if OP0 is used in memory
16506 address in NEXT. */
16507 return distance;
16508 }
16509
16510 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16511 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16512 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16513 && regno0 == DF_REF_REGNO (*def_rec))
16514 {
16515 /* Return -1 if OP0 is set in NEXT. */
16516 return -1;
16517 }
16518
16519 }
16520 next = NEXT_INSN (next);
16521 }
16522 }
16523 }
16524
16525 return -1;
16526 }
16527
16528 /* Define this macro to tune LEA priority vs ADD, it take effect when
16529 there is a dilemma of choicing LEA or ADD
16530 Negative value: ADD is more preferred than LEA
16531 Zero: Netrual
16532 Positive value: LEA is more preferred than ADD*/
16533 #define IX86_LEA_PRIORITY 2
16534
16535 /* Return true if it is ok to optimize an ADD operation to LEA
16536 operation to avoid flag register consumation. For most processors,
16537 ADD is faster than LEA. For the processors like ATOM, if the
16538 destination register of LEA holds an actual address which will be
16539 used soon, LEA is better and otherwise ADD is better. */
16540
16541 bool
16542 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16543 {
16544 unsigned int regno0 = true_regnum (operands[0]);
16545 unsigned int regno1 = true_regnum (operands[1]);
16546 unsigned int regno2 = true_regnum (operands[2]);
16547
16548 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16549 if (regno0 != regno1 && regno0 != regno2)
16550 return true;
16551
16552 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16553 return false;
16554 else
16555 {
16556 int dist_define, dist_use;
16557
16558 /* Return false if REGNO0 isn't used in memory address. */
16559 dist_use = distance_agu_use (regno0, insn);
16560 if (dist_use <= 0)
16561 return false;
16562
16563 dist_define = distance_non_agu_define (regno1, regno2, insn);
16564 if (dist_define <= 0)
16565 return true;
16566
16567 /* If this insn has both backward non-agu dependence and forward
16568 agu dependence, the one with short distance take effect. */
16569 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16570 return false;
16571
16572 return true;
16573 }
16574 }
16575
16576 /* Return true if destination reg of SET_BODY is shift count of
16577 USE_BODY. */
16578
16579 static bool
16580 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16581 {
16582 rtx set_dest;
16583 rtx shift_rtx;
16584 int i;
16585
16586 /* Retrieve destination of SET_BODY. */
16587 switch (GET_CODE (set_body))
16588 {
16589 case SET:
16590 set_dest = SET_DEST (set_body);
16591 if (!set_dest || !REG_P (set_dest))
16592 return false;
16593 break;
16594 case PARALLEL:
16595 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16596 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16597 use_body))
16598 return true;
16599 default:
16600 return false;
16601 break;
16602 }
16603
16604 /* Retrieve shift count of USE_BODY. */
16605 switch (GET_CODE (use_body))
16606 {
16607 case SET:
16608 shift_rtx = XEXP (use_body, 1);
16609 break;
16610 case PARALLEL:
16611 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16612 if (ix86_dep_by_shift_count_body (set_body,
16613 XVECEXP (use_body, 0, i)))
16614 return true;
16615 default:
16616 return false;
16617 break;
16618 }
16619
16620 if (shift_rtx
16621 && (GET_CODE (shift_rtx) == ASHIFT
16622 || GET_CODE (shift_rtx) == LSHIFTRT
16623 || GET_CODE (shift_rtx) == ASHIFTRT
16624 || GET_CODE (shift_rtx) == ROTATE
16625 || GET_CODE (shift_rtx) == ROTATERT))
16626 {
16627 rtx shift_count = XEXP (shift_rtx, 1);
16628
16629 /* Return true if shift count is dest of SET_BODY. */
16630 if (REG_P (shift_count)
16631 && true_regnum (set_dest) == true_regnum (shift_count))
16632 return true;
16633 }
16634
16635 return false;
16636 }
16637
16638 /* Return true if destination reg of SET_INSN is shift count of
16639 USE_INSN. */
16640
16641 bool
16642 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16643 {
16644 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16645 PATTERN (use_insn));
16646 }
16647
16648 /* Return TRUE or FALSE depending on whether the unary operator meets the
16649 appropriate constraints. */
16650
16651 bool
16652 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16653 enum machine_mode mode ATTRIBUTE_UNUSED,
16654 rtx operands[2] ATTRIBUTE_UNUSED)
16655 {
16656 /* If one of operands is memory, source and destination must match. */
16657 if ((MEM_P (operands[0])
16658 || MEM_P (operands[1]))
16659 && ! rtx_equal_p (operands[0], operands[1]))
16660 return false;
16661 return true;
16662 }
16663
16664 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16665 are ok, keeping in mind the possible movddup alternative. */
16666
16667 bool
16668 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16669 {
16670 if (MEM_P (operands[0]))
16671 return rtx_equal_p (operands[0], operands[1 + high]);
16672 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16673 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16674 return true;
16675 }
16676
16677 /* Post-reload splitter for converting an SF or DFmode value in an
16678 SSE register into an unsigned SImode. */
16679
16680 void
16681 ix86_split_convert_uns_si_sse (rtx operands[])
16682 {
16683 enum machine_mode vecmode;
16684 rtx value, large, zero_or_two31, input, two31, x;
16685
16686 large = operands[1];
16687 zero_or_two31 = operands[2];
16688 input = operands[3];
16689 two31 = operands[4];
16690 vecmode = GET_MODE (large);
16691 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16692
16693 /* Load up the value into the low element. We must ensure that the other
16694 elements are valid floats -- zero is the easiest such value. */
16695 if (MEM_P (input))
16696 {
16697 if (vecmode == V4SFmode)
16698 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16699 else
16700 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16701 }
16702 else
16703 {
16704 input = gen_rtx_REG (vecmode, REGNO (input));
16705 emit_move_insn (value, CONST0_RTX (vecmode));
16706 if (vecmode == V4SFmode)
16707 emit_insn (gen_sse_movss (value, value, input));
16708 else
16709 emit_insn (gen_sse2_movsd (value, value, input));
16710 }
16711
16712 emit_move_insn (large, two31);
16713 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16714
16715 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16716 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16717
16718 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16719 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16720
16721 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16722 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16723
16724 large = gen_rtx_REG (V4SImode, REGNO (large));
16725 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16726
16727 x = gen_rtx_REG (V4SImode, REGNO (value));
16728 if (vecmode == V4SFmode)
16729 emit_insn (gen_sse2_cvttps2dq (x, value));
16730 else
16731 emit_insn (gen_sse2_cvttpd2dq (x, value));
16732 value = x;
16733
16734 emit_insn (gen_xorv4si3 (value, value, large));
16735 }
16736
16737 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16738 Expects the 64-bit DImode to be supplied in a pair of integral
16739 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16740 -mfpmath=sse, !optimize_size only. */
16741
16742 void
16743 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16744 {
16745 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16746 rtx int_xmm, fp_xmm;
16747 rtx biases, exponents;
16748 rtx x;
16749
16750 int_xmm = gen_reg_rtx (V4SImode);
16751 if (TARGET_INTER_UNIT_MOVES)
16752 emit_insn (gen_movdi_to_sse (int_xmm, input));
16753 else if (TARGET_SSE_SPLIT_REGS)
16754 {
16755 emit_clobber (int_xmm);
16756 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16757 }
16758 else
16759 {
16760 x = gen_reg_rtx (V2DImode);
16761 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16762 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16763 }
16764
16765 x = gen_rtx_CONST_VECTOR (V4SImode,
16766 gen_rtvec (4, GEN_INT (0x43300000UL),
16767 GEN_INT (0x45300000UL),
16768 const0_rtx, const0_rtx));
16769 exponents = validize_mem (force_const_mem (V4SImode, x));
16770
16771 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16772 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16773
16774 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16775 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16776 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16777 (0x1.0p84 + double(fp_value_hi_xmm)).
16778 Note these exponents differ by 32. */
16779
16780 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16781
16782 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16783 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16784 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16785 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16786 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16787 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16788 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16789 biases = validize_mem (force_const_mem (V2DFmode, biases));
16790 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16791
16792 /* Add the upper and lower DFmode values together. */
16793 if (TARGET_SSE3)
16794 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16795 else
16796 {
16797 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16798 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16799 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16800 }
16801
16802 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16803 }
16804
16805 /* Not used, but eases macroization of patterns. */
16806 void
16807 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16808 rtx input ATTRIBUTE_UNUSED)
16809 {
16810 gcc_unreachable ();
16811 }
16812
16813 /* Convert an unsigned SImode value into a DFmode. Only currently used
16814 for SSE, but applicable anywhere. */
16815
16816 void
16817 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16818 {
16819 REAL_VALUE_TYPE TWO31r;
16820 rtx x, fp;
16821
16822 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16823 NULL, 1, OPTAB_DIRECT);
16824
16825 fp = gen_reg_rtx (DFmode);
16826 emit_insn (gen_floatsidf2 (fp, x));
16827
16828 real_ldexp (&TWO31r, &dconst1, 31);
16829 x = const_double_from_real_value (TWO31r, DFmode);
16830
16831 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16832 if (x != target)
16833 emit_move_insn (target, x);
16834 }
16835
16836 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16837 32-bit mode; otherwise we have a direct convert instruction. */
16838
16839 void
16840 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16841 {
16842 REAL_VALUE_TYPE TWO32r;
16843 rtx fp_lo, fp_hi, x;
16844
16845 fp_lo = gen_reg_rtx (DFmode);
16846 fp_hi = gen_reg_rtx (DFmode);
16847
16848 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16849
16850 real_ldexp (&TWO32r, &dconst1, 32);
16851 x = const_double_from_real_value (TWO32r, DFmode);
16852 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16853
16854 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16855
16856 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16857 0, OPTAB_DIRECT);
16858 if (x != target)
16859 emit_move_insn (target, x);
16860 }
16861
16862 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16863 For x86_32, -mfpmath=sse, !optimize_size only. */
16864 void
16865 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16866 {
16867 REAL_VALUE_TYPE ONE16r;
16868 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16869
16870 real_ldexp (&ONE16r, &dconst1, 16);
16871 x = const_double_from_real_value (ONE16r, SFmode);
16872 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16873 NULL, 0, OPTAB_DIRECT);
16874 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16875 NULL, 0, OPTAB_DIRECT);
16876 fp_hi = gen_reg_rtx (SFmode);
16877 fp_lo = gen_reg_rtx (SFmode);
16878 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16879 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16880 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16881 0, OPTAB_DIRECT);
16882 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16883 0, OPTAB_DIRECT);
16884 if (!rtx_equal_p (target, fp_hi))
16885 emit_move_insn (target, fp_hi);
16886 }
16887
16888 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16889 then replicate the value for all elements of the vector
16890 register. */
16891
16892 rtx
16893 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16894 {
16895 rtvec v;
16896 switch (mode)
16897 {
16898 case V4SImode:
16899 gcc_assert (vect);
16900 v = gen_rtvec (4, value, value, value, value);
16901 return gen_rtx_CONST_VECTOR (V4SImode, v);
16902
16903 case V2DImode:
16904 gcc_assert (vect);
16905 v = gen_rtvec (2, value, value);
16906 return gen_rtx_CONST_VECTOR (V2DImode, v);
16907
16908 case V8SFmode:
16909 if (vect)
16910 v = gen_rtvec (8, value, value, value, value,
16911 value, value, value, value);
16912 else
16913 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16914 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16915 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16916 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16917 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16918
16919 case V4SFmode:
16920 if (vect)
16921 v = gen_rtvec (4, value, value, value, value);
16922 else
16923 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16924 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16925 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16926
16927 case V4DFmode:
16928 if (vect)
16929 v = gen_rtvec (4, value, value, value, value);
16930 else
16931 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16932 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16933 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16934
16935 case V2DFmode:
16936 if (vect)
16937 v = gen_rtvec (2, value, value);
16938 else
16939 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16940 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16941
16942 default:
16943 gcc_unreachable ();
16944 }
16945 }
16946
16947 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16948 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16949 for an SSE register. If VECT is true, then replicate the mask for
16950 all elements of the vector register. If INVERT is true, then create
16951 a mask excluding the sign bit. */
16952
16953 rtx
16954 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16955 {
16956 enum machine_mode vec_mode, imode;
16957 HOST_WIDE_INT hi, lo;
16958 int shift = 63;
16959 rtx v;
16960 rtx mask;
16961
16962 /* Find the sign bit, sign extended to 2*HWI. */
16963 switch (mode)
16964 {
16965 case V4SImode:
16966 case V8SFmode:
16967 case V4SFmode:
16968 vec_mode = mode;
16969 mode = GET_MODE_INNER (mode);
16970 imode = SImode;
16971 lo = 0x80000000, hi = lo < 0;
16972 break;
16973
16974 case V2DImode:
16975 case V4DFmode:
16976 case V2DFmode:
16977 vec_mode = mode;
16978 mode = GET_MODE_INNER (mode);
16979 imode = DImode;
16980 if (HOST_BITS_PER_WIDE_INT >= 64)
16981 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16982 else
16983 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16984 break;
16985
16986 case TImode:
16987 case TFmode:
16988 vec_mode = VOIDmode;
16989 if (HOST_BITS_PER_WIDE_INT >= 64)
16990 {
16991 imode = TImode;
16992 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16993 }
16994 else
16995 {
16996 rtvec vec;
16997
16998 imode = DImode;
16999 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17000
17001 if (invert)
17002 {
17003 lo = ~lo, hi = ~hi;
17004 v = constm1_rtx;
17005 }
17006 else
17007 v = const0_rtx;
17008
17009 mask = immed_double_const (lo, hi, imode);
17010
17011 vec = gen_rtvec (2, v, mask);
17012 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17013 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17014
17015 return v;
17016 }
17017 break;
17018
17019 default:
17020 gcc_unreachable ();
17021 }
17022
17023 if (invert)
17024 lo = ~lo, hi = ~hi;
17025
17026 /* Force this value into the low part of a fp vector constant. */
17027 mask = immed_double_const (lo, hi, imode);
17028 mask = gen_lowpart (mode, mask);
17029
17030 if (vec_mode == VOIDmode)
17031 return force_reg (mode, mask);
17032
17033 v = ix86_build_const_vector (vec_mode, vect, mask);
17034 return force_reg (vec_mode, v);
17035 }
17036
17037 /* Generate code for floating point ABS or NEG. */
17038
17039 void
17040 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17041 rtx operands[])
17042 {
17043 rtx mask, set, dst, src;
17044 bool use_sse = false;
17045 bool vector_mode = VECTOR_MODE_P (mode);
17046 enum machine_mode vmode = mode;
17047
17048 if (vector_mode)
17049 use_sse = true;
17050 else if (mode == TFmode)
17051 use_sse = true;
17052 else if (TARGET_SSE_MATH)
17053 {
17054 use_sse = SSE_FLOAT_MODE_P (mode);
17055 if (mode == SFmode)
17056 vmode = V4SFmode;
17057 else if (mode == DFmode)
17058 vmode = V2DFmode;
17059 }
17060
17061 /* NEG and ABS performed with SSE use bitwise mask operations.
17062 Create the appropriate mask now. */
17063 if (use_sse)
17064 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17065 else
17066 mask = NULL_RTX;
17067
17068 dst = operands[0];
17069 src = operands[1];
17070
17071 set = gen_rtx_fmt_e (code, mode, src);
17072 set = gen_rtx_SET (VOIDmode, dst, set);
17073
17074 if (mask)
17075 {
17076 rtx use, clob;
17077 rtvec par;
17078
17079 use = gen_rtx_USE (VOIDmode, mask);
17080 if (vector_mode)
17081 par = gen_rtvec (2, set, use);
17082 else
17083 {
17084 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17085 par = gen_rtvec (3, set, use, clob);
17086 }
17087 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17088 }
17089 else
17090 emit_insn (set);
17091 }
17092
17093 /* Expand a copysign operation. Special case operand 0 being a constant. */
17094
17095 void
17096 ix86_expand_copysign (rtx operands[])
17097 {
17098 enum machine_mode mode, vmode;
17099 rtx dest, op0, op1, mask, nmask;
17100
17101 dest = operands[0];
17102 op0 = operands[1];
17103 op1 = operands[2];
17104
17105 mode = GET_MODE (dest);
17106
17107 if (mode == SFmode)
17108 vmode = V4SFmode;
17109 else if (mode == DFmode)
17110 vmode = V2DFmode;
17111 else
17112 vmode = mode;
17113
17114 if (GET_CODE (op0) == CONST_DOUBLE)
17115 {
17116 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17117
17118 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17119 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17120
17121 if (mode == SFmode || mode == DFmode)
17122 {
17123 if (op0 == CONST0_RTX (mode))
17124 op0 = CONST0_RTX (vmode);
17125 else
17126 {
17127 rtx v = ix86_build_const_vector (vmode, false, op0);
17128
17129 op0 = force_reg (vmode, v);
17130 }
17131 }
17132 else if (op0 != CONST0_RTX (mode))
17133 op0 = force_reg (mode, op0);
17134
17135 mask = ix86_build_signbit_mask (vmode, 0, 0);
17136
17137 if (mode == SFmode)
17138 copysign_insn = gen_copysignsf3_const;
17139 else if (mode == DFmode)
17140 copysign_insn = gen_copysigndf3_const;
17141 else
17142 copysign_insn = gen_copysigntf3_const;
17143
17144 emit_insn (copysign_insn (dest, op0, op1, mask));
17145 }
17146 else
17147 {
17148 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17149
17150 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17151 mask = ix86_build_signbit_mask (vmode, 0, 0);
17152
17153 if (mode == SFmode)
17154 copysign_insn = gen_copysignsf3_var;
17155 else if (mode == DFmode)
17156 copysign_insn = gen_copysigndf3_var;
17157 else
17158 copysign_insn = gen_copysigntf3_var;
17159
17160 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17161 }
17162 }
17163
17164 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17165 be a constant, and so has already been expanded into a vector constant. */
17166
17167 void
17168 ix86_split_copysign_const (rtx operands[])
17169 {
17170 enum machine_mode mode, vmode;
17171 rtx dest, op0, mask, x;
17172
17173 dest = operands[0];
17174 op0 = operands[1];
17175 mask = operands[3];
17176
17177 mode = GET_MODE (dest);
17178 vmode = GET_MODE (mask);
17179
17180 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17181 x = gen_rtx_AND (vmode, dest, mask);
17182 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17183
17184 if (op0 != CONST0_RTX (vmode))
17185 {
17186 x = gen_rtx_IOR (vmode, dest, op0);
17187 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17188 }
17189 }
17190
17191 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17192 so we have to do two masks. */
17193
17194 void
17195 ix86_split_copysign_var (rtx operands[])
17196 {
17197 enum machine_mode mode, vmode;
17198 rtx dest, scratch, op0, op1, mask, nmask, x;
17199
17200 dest = operands[0];
17201 scratch = operands[1];
17202 op0 = operands[2];
17203 op1 = operands[3];
17204 nmask = operands[4];
17205 mask = operands[5];
17206
17207 mode = GET_MODE (dest);
17208 vmode = GET_MODE (mask);
17209
17210 if (rtx_equal_p (op0, op1))
17211 {
17212 /* Shouldn't happen often (it's useless, obviously), but when it does
17213 we'd generate incorrect code if we continue below. */
17214 emit_move_insn (dest, op0);
17215 return;
17216 }
17217
17218 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17219 {
17220 gcc_assert (REGNO (op1) == REGNO (scratch));
17221
17222 x = gen_rtx_AND (vmode, scratch, mask);
17223 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17224
17225 dest = mask;
17226 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17227 x = gen_rtx_NOT (vmode, dest);
17228 x = gen_rtx_AND (vmode, x, op0);
17229 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17230 }
17231 else
17232 {
17233 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17234 {
17235 x = gen_rtx_AND (vmode, scratch, mask);
17236 }
17237 else /* alternative 2,4 */
17238 {
17239 gcc_assert (REGNO (mask) == REGNO (scratch));
17240 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17241 x = gen_rtx_AND (vmode, scratch, op1);
17242 }
17243 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17244
17245 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17246 {
17247 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17248 x = gen_rtx_AND (vmode, dest, nmask);
17249 }
17250 else /* alternative 3,4 */
17251 {
17252 gcc_assert (REGNO (nmask) == REGNO (dest));
17253 dest = nmask;
17254 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17255 x = gen_rtx_AND (vmode, dest, op0);
17256 }
17257 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17258 }
17259
17260 x = gen_rtx_IOR (vmode, dest, scratch);
17261 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17262 }
17263
17264 /* Return TRUE or FALSE depending on whether the first SET in INSN
17265 has source and destination with matching CC modes, and that the
17266 CC mode is at least as constrained as REQ_MODE. */
17267
17268 bool
17269 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17270 {
17271 rtx set;
17272 enum machine_mode set_mode;
17273
17274 set = PATTERN (insn);
17275 if (GET_CODE (set) == PARALLEL)
17276 set = XVECEXP (set, 0, 0);
17277 gcc_assert (GET_CODE (set) == SET);
17278 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17279
17280 set_mode = GET_MODE (SET_DEST (set));
17281 switch (set_mode)
17282 {
17283 case CCNOmode:
17284 if (req_mode != CCNOmode
17285 && (req_mode != CCmode
17286 || XEXP (SET_SRC (set), 1) != const0_rtx))
17287 return false;
17288 break;
17289 case CCmode:
17290 if (req_mode == CCGCmode)
17291 return false;
17292 /* FALLTHRU */
17293 case CCGCmode:
17294 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17295 return false;
17296 /* FALLTHRU */
17297 case CCGOCmode:
17298 if (req_mode == CCZmode)
17299 return false;
17300 /* FALLTHRU */
17301 case CCAmode:
17302 case CCCmode:
17303 case CCOmode:
17304 case CCSmode:
17305 case CCZmode:
17306 break;
17307
17308 default:
17309 gcc_unreachable ();
17310 }
17311
17312 return GET_MODE (SET_SRC (set)) == set_mode;
17313 }
17314
17315 /* Generate insn patterns to do an integer compare of OPERANDS. */
17316
17317 static rtx
17318 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17319 {
17320 enum machine_mode cmpmode;
17321 rtx tmp, flags;
17322
17323 cmpmode = SELECT_CC_MODE (code, op0, op1);
17324 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17325
17326 /* This is very simple, but making the interface the same as in the
17327 FP case makes the rest of the code easier. */
17328 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17329 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17330
17331 /* Return the test that should be put into the flags user, i.e.
17332 the bcc, scc, or cmov instruction. */
17333 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17334 }
17335
17336 /* Figure out whether to use ordered or unordered fp comparisons.
17337 Return the appropriate mode to use. */
17338
17339 enum machine_mode
17340 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17341 {
17342 /* ??? In order to make all comparisons reversible, we do all comparisons
17343 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17344 all forms trapping and nontrapping comparisons, we can make inequality
17345 comparisons trapping again, since it results in better code when using
17346 FCOM based compares. */
17347 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17348 }
17349
17350 enum machine_mode
17351 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17352 {
17353 enum machine_mode mode = GET_MODE (op0);
17354
17355 if (SCALAR_FLOAT_MODE_P (mode))
17356 {
17357 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17358 return ix86_fp_compare_mode (code);
17359 }
17360
17361 switch (code)
17362 {
17363 /* Only zero flag is needed. */
17364 case EQ: /* ZF=0 */
17365 case NE: /* ZF!=0 */
17366 return CCZmode;
17367 /* Codes needing carry flag. */
17368 case GEU: /* CF=0 */
17369 case LTU: /* CF=1 */
17370 /* Detect overflow checks. They need just the carry flag. */
17371 if (GET_CODE (op0) == PLUS
17372 && rtx_equal_p (op1, XEXP (op0, 0)))
17373 return CCCmode;
17374 else
17375 return CCmode;
17376 case GTU: /* CF=0 & ZF=0 */
17377 case LEU: /* CF=1 | ZF=1 */
17378 /* Detect overflow checks. They need just the carry flag. */
17379 if (GET_CODE (op0) == MINUS
17380 && rtx_equal_p (op1, XEXP (op0, 0)))
17381 return CCCmode;
17382 else
17383 return CCmode;
17384 /* Codes possibly doable only with sign flag when
17385 comparing against zero. */
17386 case GE: /* SF=OF or SF=0 */
17387 case LT: /* SF<>OF or SF=1 */
17388 if (op1 == const0_rtx)
17389 return CCGOCmode;
17390 else
17391 /* For other cases Carry flag is not required. */
17392 return CCGCmode;
17393 /* Codes doable only with sign flag when comparing
17394 against zero, but we miss jump instruction for it
17395 so we need to use relational tests against overflow
17396 that thus needs to be zero. */
17397 case GT: /* ZF=0 & SF=OF */
17398 case LE: /* ZF=1 | SF<>OF */
17399 if (op1 == const0_rtx)
17400 return CCNOmode;
17401 else
17402 return CCGCmode;
17403 /* strcmp pattern do (use flags) and combine may ask us for proper
17404 mode. */
17405 case USE:
17406 return CCmode;
17407 default:
17408 gcc_unreachable ();
17409 }
17410 }
17411
17412 /* Return the fixed registers used for condition codes. */
17413
17414 static bool
17415 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17416 {
17417 *p1 = FLAGS_REG;
17418 *p2 = FPSR_REG;
17419 return true;
17420 }
17421
17422 /* If two condition code modes are compatible, return a condition code
17423 mode which is compatible with both. Otherwise, return
17424 VOIDmode. */
17425
17426 static enum machine_mode
17427 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17428 {
17429 if (m1 == m2)
17430 return m1;
17431
17432 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17433 return VOIDmode;
17434
17435 if ((m1 == CCGCmode && m2 == CCGOCmode)
17436 || (m1 == CCGOCmode && m2 == CCGCmode))
17437 return CCGCmode;
17438
17439 switch (m1)
17440 {
17441 default:
17442 gcc_unreachable ();
17443
17444 case CCmode:
17445 case CCGCmode:
17446 case CCGOCmode:
17447 case CCNOmode:
17448 case CCAmode:
17449 case CCCmode:
17450 case CCOmode:
17451 case CCSmode:
17452 case CCZmode:
17453 switch (m2)
17454 {
17455 default:
17456 return VOIDmode;
17457
17458 case CCmode:
17459 case CCGCmode:
17460 case CCGOCmode:
17461 case CCNOmode:
17462 case CCAmode:
17463 case CCCmode:
17464 case CCOmode:
17465 case CCSmode:
17466 case CCZmode:
17467 return CCmode;
17468 }
17469
17470 case CCFPmode:
17471 case CCFPUmode:
17472 /* These are only compatible with themselves, which we already
17473 checked above. */
17474 return VOIDmode;
17475 }
17476 }
17477
17478
17479 /* Return a comparison we can do and that it is equivalent to
17480 swap_condition (code) apart possibly from orderedness.
17481 But, never change orderedness if TARGET_IEEE_FP, returning
17482 UNKNOWN in that case if necessary. */
17483
17484 static enum rtx_code
17485 ix86_fp_swap_condition (enum rtx_code code)
17486 {
17487 switch (code)
17488 {
17489 case GT: /* GTU - CF=0 & ZF=0 */
17490 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17491 case GE: /* GEU - CF=0 */
17492 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17493 case UNLT: /* LTU - CF=1 */
17494 return TARGET_IEEE_FP ? UNKNOWN : GT;
17495 case UNLE: /* LEU - CF=1 | ZF=1 */
17496 return TARGET_IEEE_FP ? UNKNOWN : GE;
17497 default:
17498 return swap_condition (code);
17499 }
17500 }
17501
17502 /* Return cost of comparison CODE using the best strategy for performance.
17503 All following functions do use number of instructions as a cost metrics.
17504 In future this should be tweaked to compute bytes for optimize_size and
17505 take into account performance of various instructions on various CPUs. */
17506
17507 static int
17508 ix86_fp_comparison_cost (enum rtx_code code)
17509 {
17510 int arith_cost;
17511
17512 /* The cost of code using bit-twiddling on %ah. */
17513 switch (code)
17514 {
17515 case UNLE:
17516 case UNLT:
17517 case LTGT:
17518 case GT:
17519 case GE:
17520 case UNORDERED:
17521 case ORDERED:
17522 case UNEQ:
17523 arith_cost = 4;
17524 break;
17525 case LT:
17526 case NE:
17527 case EQ:
17528 case UNGE:
17529 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17530 break;
17531 case LE:
17532 case UNGT:
17533 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17534 break;
17535 default:
17536 gcc_unreachable ();
17537 }
17538
17539 switch (ix86_fp_comparison_strategy (code))
17540 {
17541 case IX86_FPCMP_COMI:
17542 return arith_cost > 4 ? 3 : 2;
17543 case IX86_FPCMP_SAHF:
17544 return arith_cost > 4 ? 4 : 3;
17545 default:
17546 return arith_cost;
17547 }
17548 }
17549
17550 /* Return strategy to use for floating-point. We assume that fcomi is always
17551 preferrable where available, since that is also true when looking at size
17552 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17553
17554 enum ix86_fpcmp_strategy
17555 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17556 {
17557 /* Do fcomi/sahf based test when profitable. */
17558
17559 if (TARGET_CMOVE)
17560 return IX86_FPCMP_COMI;
17561
17562 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17563 return IX86_FPCMP_SAHF;
17564
17565 return IX86_FPCMP_ARITH;
17566 }
17567
17568 /* Swap, force into registers, or otherwise massage the two operands
17569 to a fp comparison. The operands are updated in place; the new
17570 comparison code is returned. */
17571
17572 static enum rtx_code
17573 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17574 {
17575 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17576 rtx op0 = *pop0, op1 = *pop1;
17577 enum machine_mode op_mode = GET_MODE (op0);
17578 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17579
17580 /* All of the unordered compare instructions only work on registers.
17581 The same is true of the fcomi compare instructions. The XFmode
17582 compare instructions require registers except when comparing
17583 against zero or when converting operand 1 from fixed point to
17584 floating point. */
17585
17586 if (!is_sse
17587 && (fpcmp_mode == CCFPUmode
17588 || (op_mode == XFmode
17589 && ! (standard_80387_constant_p (op0) == 1
17590 || standard_80387_constant_p (op1) == 1)
17591 && GET_CODE (op1) != FLOAT)
17592 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17593 {
17594 op0 = force_reg (op_mode, op0);
17595 op1 = force_reg (op_mode, op1);
17596 }
17597 else
17598 {
17599 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17600 things around if they appear profitable, otherwise force op0
17601 into a register. */
17602
17603 if (standard_80387_constant_p (op0) == 0
17604 || (MEM_P (op0)
17605 && ! (standard_80387_constant_p (op1) == 0
17606 || MEM_P (op1))))
17607 {
17608 enum rtx_code new_code = ix86_fp_swap_condition (code);
17609 if (new_code != UNKNOWN)
17610 {
17611 rtx tmp;
17612 tmp = op0, op0 = op1, op1 = tmp;
17613 code = new_code;
17614 }
17615 }
17616
17617 if (!REG_P (op0))
17618 op0 = force_reg (op_mode, op0);
17619
17620 if (CONSTANT_P (op1))
17621 {
17622 int tmp = standard_80387_constant_p (op1);
17623 if (tmp == 0)
17624 op1 = validize_mem (force_const_mem (op_mode, op1));
17625 else if (tmp == 1)
17626 {
17627 if (TARGET_CMOVE)
17628 op1 = force_reg (op_mode, op1);
17629 }
17630 else
17631 op1 = force_reg (op_mode, op1);
17632 }
17633 }
17634
17635 /* Try to rearrange the comparison to make it cheaper. */
17636 if (ix86_fp_comparison_cost (code)
17637 > ix86_fp_comparison_cost (swap_condition (code))
17638 && (REG_P (op1) || can_create_pseudo_p ()))
17639 {
17640 rtx tmp;
17641 tmp = op0, op0 = op1, op1 = tmp;
17642 code = swap_condition (code);
17643 if (!REG_P (op0))
17644 op0 = force_reg (op_mode, op0);
17645 }
17646
17647 *pop0 = op0;
17648 *pop1 = op1;
17649 return code;
17650 }
17651
17652 /* Convert comparison codes we use to represent FP comparison to integer
17653 code that will result in proper branch. Return UNKNOWN if no such code
17654 is available. */
17655
17656 enum rtx_code
17657 ix86_fp_compare_code_to_integer (enum rtx_code code)
17658 {
17659 switch (code)
17660 {
17661 case GT:
17662 return GTU;
17663 case GE:
17664 return GEU;
17665 case ORDERED:
17666 case UNORDERED:
17667 return code;
17668 break;
17669 case UNEQ:
17670 return EQ;
17671 break;
17672 case UNLT:
17673 return LTU;
17674 break;
17675 case UNLE:
17676 return LEU;
17677 break;
17678 case LTGT:
17679 return NE;
17680 break;
17681 default:
17682 return UNKNOWN;
17683 }
17684 }
17685
17686 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17687
17688 static rtx
17689 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17690 {
17691 enum machine_mode fpcmp_mode, intcmp_mode;
17692 rtx tmp, tmp2;
17693
17694 fpcmp_mode = ix86_fp_compare_mode (code);
17695 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17696
17697 /* Do fcomi/sahf based test when profitable. */
17698 switch (ix86_fp_comparison_strategy (code))
17699 {
17700 case IX86_FPCMP_COMI:
17701 intcmp_mode = fpcmp_mode;
17702 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17703 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17704 tmp);
17705 emit_insn (tmp);
17706 break;
17707
17708 case IX86_FPCMP_SAHF:
17709 intcmp_mode = fpcmp_mode;
17710 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17711 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17712 tmp);
17713
17714 if (!scratch)
17715 scratch = gen_reg_rtx (HImode);
17716 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17717 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17718 break;
17719
17720 case IX86_FPCMP_ARITH:
17721 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17722 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17723 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17724 if (!scratch)
17725 scratch = gen_reg_rtx (HImode);
17726 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17727
17728 /* In the unordered case, we have to check C2 for NaN's, which
17729 doesn't happen to work out to anything nice combination-wise.
17730 So do some bit twiddling on the value we've got in AH to come
17731 up with an appropriate set of condition codes. */
17732
17733 intcmp_mode = CCNOmode;
17734 switch (code)
17735 {
17736 case GT:
17737 case UNGT:
17738 if (code == GT || !TARGET_IEEE_FP)
17739 {
17740 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17741 code = EQ;
17742 }
17743 else
17744 {
17745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17746 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17747 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17748 intcmp_mode = CCmode;
17749 code = GEU;
17750 }
17751 break;
17752 case LT:
17753 case UNLT:
17754 if (code == LT && TARGET_IEEE_FP)
17755 {
17756 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17757 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17758 intcmp_mode = CCmode;
17759 code = EQ;
17760 }
17761 else
17762 {
17763 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17764 code = NE;
17765 }
17766 break;
17767 case GE:
17768 case UNGE:
17769 if (code == GE || !TARGET_IEEE_FP)
17770 {
17771 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17772 code = EQ;
17773 }
17774 else
17775 {
17776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17777 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17778 code = NE;
17779 }
17780 break;
17781 case LE:
17782 case UNLE:
17783 if (code == LE && TARGET_IEEE_FP)
17784 {
17785 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17786 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17787 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17788 intcmp_mode = CCmode;
17789 code = LTU;
17790 }
17791 else
17792 {
17793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17794 code = NE;
17795 }
17796 break;
17797 case EQ:
17798 case UNEQ:
17799 if (code == EQ && TARGET_IEEE_FP)
17800 {
17801 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17802 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17803 intcmp_mode = CCmode;
17804 code = EQ;
17805 }
17806 else
17807 {
17808 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17809 code = NE;
17810 }
17811 break;
17812 case NE:
17813 case LTGT:
17814 if (code == NE && TARGET_IEEE_FP)
17815 {
17816 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17817 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17818 GEN_INT (0x40)));
17819 code = NE;
17820 }
17821 else
17822 {
17823 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17824 code = EQ;
17825 }
17826 break;
17827
17828 case UNORDERED:
17829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17830 code = NE;
17831 break;
17832 case ORDERED:
17833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17834 code = EQ;
17835 break;
17836
17837 default:
17838 gcc_unreachable ();
17839 }
17840 break;
17841
17842 default:
17843 gcc_unreachable();
17844 }
17845
17846 /* Return the test that should be put into the flags user, i.e.
17847 the bcc, scc, or cmov instruction. */
17848 return gen_rtx_fmt_ee (code, VOIDmode,
17849 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17850 const0_rtx);
17851 }
17852
17853 static rtx
17854 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17855 {
17856 rtx ret;
17857
17858 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17859 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17860
17861 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17862 {
17863 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17864 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17865 }
17866 else
17867 ret = ix86_expand_int_compare (code, op0, op1);
17868
17869 return ret;
17870 }
17871
17872 void
17873 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17874 {
17875 enum machine_mode mode = GET_MODE (op0);
17876 rtx tmp;
17877
17878 switch (mode)
17879 {
17880 case SFmode:
17881 case DFmode:
17882 case XFmode:
17883 case QImode:
17884 case HImode:
17885 case SImode:
17886 simple:
17887 tmp = ix86_expand_compare (code, op0, op1);
17888 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17889 gen_rtx_LABEL_REF (VOIDmode, label),
17890 pc_rtx);
17891 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17892 return;
17893
17894 case DImode:
17895 if (TARGET_64BIT)
17896 goto simple;
17897 case TImode:
17898 /* Expand DImode branch into multiple compare+branch. */
17899 {
17900 rtx lo[2], hi[2], label2;
17901 enum rtx_code code1, code2, code3;
17902 enum machine_mode submode;
17903
17904 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17905 {
17906 tmp = op0, op0 = op1, op1 = tmp;
17907 code = swap_condition (code);
17908 }
17909
17910 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17911 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17912
17913 submode = mode == DImode ? SImode : DImode;
17914
17915 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17916 avoid two branches. This costs one extra insn, so disable when
17917 optimizing for size. */
17918
17919 if ((code == EQ || code == NE)
17920 && (!optimize_insn_for_size_p ()
17921 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17922 {
17923 rtx xor0, xor1;
17924
17925 xor1 = hi[0];
17926 if (hi[1] != const0_rtx)
17927 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17928 NULL_RTX, 0, OPTAB_WIDEN);
17929
17930 xor0 = lo[0];
17931 if (lo[1] != const0_rtx)
17932 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17933 NULL_RTX, 0, OPTAB_WIDEN);
17934
17935 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17936 NULL_RTX, 0, OPTAB_WIDEN);
17937
17938 ix86_expand_branch (code, tmp, const0_rtx, label);
17939 return;
17940 }
17941
17942 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17943 op1 is a constant and the low word is zero, then we can just
17944 examine the high word. Similarly for low word -1 and
17945 less-or-equal-than or greater-than. */
17946
17947 if (CONST_INT_P (hi[1]))
17948 switch (code)
17949 {
17950 case LT: case LTU: case GE: case GEU:
17951 if (lo[1] == const0_rtx)
17952 {
17953 ix86_expand_branch (code, hi[0], hi[1], label);
17954 return;
17955 }
17956 break;
17957 case LE: case LEU: case GT: case GTU:
17958 if (lo[1] == constm1_rtx)
17959 {
17960 ix86_expand_branch (code, hi[0], hi[1], label);
17961 return;
17962 }
17963 break;
17964 default:
17965 break;
17966 }
17967
17968 /* Otherwise, we need two or three jumps. */
17969
17970 label2 = gen_label_rtx ();
17971
17972 code1 = code;
17973 code2 = swap_condition (code);
17974 code3 = unsigned_condition (code);
17975
17976 switch (code)
17977 {
17978 case LT: case GT: case LTU: case GTU:
17979 break;
17980
17981 case LE: code1 = LT; code2 = GT; break;
17982 case GE: code1 = GT; code2 = LT; break;
17983 case LEU: code1 = LTU; code2 = GTU; break;
17984 case GEU: code1 = GTU; code2 = LTU; break;
17985
17986 case EQ: code1 = UNKNOWN; code2 = NE; break;
17987 case NE: code2 = UNKNOWN; break;
17988
17989 default:
17990 gcc_unreachable ();
17991 }
17992
17993 /*
17994 * a < b =>
17995 * if (hi(a) < hi(b)) goto true;
17996 * if (hi(a) > hi(b)) goto false;
17997 * if (lo(a) < lo(b)) goto true;
17998 * false:
17999 */
18000
18001 if (code1 != UNKNOWN)
18002 ix86_expand_branch (code1, hi[0], hi[1], label);
18003 if (code2 != UNKNOWN)
18004 ix86_expand_branch (code2, hi[0], hi[1], label2);
18005
18006 ix86_expand_branch (code3, lo[0], lo[1], label);
18007
18008 if (code2 != UNKNOWN)
18009 emit_label (label2);
18010 return;
18011 }
18012
18013 default:
18014 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18015 goto simple;
18016 }
18017 }
18018
18019 /* Split branch based on floating point condition. */
18020 void
18021 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18022 rtx target1, rtx target2, rtx tmp, rtx pushed)
18023 {
18024 rtx condition;
18025 rtx i;
18026
18027 if (target2 != pc_rtx)
18028 {
18029 rtx tmp = target2;
18030 code = reverse_condition_maybe_unordered (code);
18031 target2 = target1;
18032 target1 = tmp;
18033 }
18034
18035 condition = ix86_expand_fp_compare (code, op1, op2,
18036 tmp);
18037
18038 /* Remove pushed operand from stack. */
18039 if (pushed)
18040 ix86_free_from_memory (GET_MODE (pushed));
18041
18042 i = emit_jump_insn (gen_rtx_SET
18043 (VOIDmode, pc_rtx,
18044 gen_rtx_IF_THEN_ELSE (VOIDmode,
18045 condition, target1, target2)));
18046 if (split_branch_probability >= 0)
18047 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18048 }
18049
18050 void
18051 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18052 {
18053 rtx ret;
18054
18055 gcc_assert (GET_MODE (dest) == QImode);
18056
18057 ret = ix86_expand_compare (code, op0, op1);
18058 PUT_MODE (ret, QImode);
18059 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18060 }
18061
18062 /* Expand comparison setting or clearing carry flag. Return true when
18063 successful and set pop for the operation. */
18064 static bool
18065 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18066 {
18067 enum machine_mode mode =
18068 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18069
18070 /* Do not handle double-mode compares that go through special path. */
18071 if (mode == (TARGET_64BIT ? TImode : DImode))
18072 return false;
18073
18074 if (SCALAR_FLOAT_MODE_P (mode))
18075 {
18076 rtx compare_op, compare_seq;
18077
18078 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18079
18080 /* Shortcut: following common codes never translate
18081 into carry flag compares. */
18082 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18083 || code == ORDERED || code == UNORDERED)
18084 return false;
18085
18086 /* These comparisons require zero flag; swap operands so they won't. */
18087 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18088 && !TARGET_IEEE_FP)
18089 {
18090 rtx tmp = op0;
18091 op0 = op1;
18092 op1 = tmp;
18093 code = swap_condition (code);
18094 }
18095
18096 /* Try to expand the comparison and verify that we end up with
18097 carry flag based comparison. This fails to be true only when
18098 we decide to expand comparison using arithmetic that is not
18099 too common scenario. */
18100 start_sequence ();
18101 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18102 compare_seq = get_insns ();
18103 end_sequence ();
18104
18105 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18106 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18107 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18108 else
18109 code = GET_CODE (compare_op);
18110
18111 if (code != LTU && code != GEU)
18112 return false;
18113
18114 emit_insn (compare_seq);
18115 *pop = compare_op;
18116 return true;
18117 }
18118
18119 if (!INTEGRAL_MODE_P (mode))
18120 return false;
18121
18122 switch (code)
18123 {
18124 case LTU:
18125 case GEU:
18126 break;
18127
18128 /* Convert a==0 into (unsigned)a<1. */
18129 case EQ:
18130 case NE:
18131 if (op1 != const0_rtx)
18132 return false;
18133 op1 = const1_rtx;
18134 code = (code == EQ ? LTU : GEU);
18135 break;
18136
18137 /* Convert a>b into b<a or a>=b-1. */
18138 case GTU:
18139 case LEU:
18140 if (CONST_INT_P (op1))
18141 {
18142 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18143 /* Bail out on overflow. We still can swap operands but that
18144 would force loading of the constant into register. */
18145 if (op1 == const0_rtx
18146 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18147 return false;
18148 code = (code == GTU ? GEU : LTU);
18149 }
18150 else
18151 {
18152 rtx tmp = op1;
18153 op1 = op0;
18154 op0 = tmp;
18155 code = (code == GTU ? LTU : GEU);
18156 }
18157 break;
18158
18159 /* Convert a>=0 into (unsigned)a<0x80000000. */
18160 case LT:
18161 case GE:
18162 if (mode == DImode || op1 != const0_rtx)
18163 return false;
18164 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18165 code = (code == LT ? GEU : LTU);
18166 break;
18167 case LE:
18168 case GT:
18169 if (mode == DImode || op1 != constm1_rtx)
18170 return false;
18171 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18172 code = (code == LE ? GEU : LTU);
18173 break;
18174
18175 default:
18176 return false;
18177 }
18178 /* Swapping operands may cause constant to appear as first operand. */
18179 if (!nonimmediate_operand (op0, VOIDmode))
18180 {
18181 if (!can_create_pseudo_p ())
18182 return false;
18183 op0 = force_reg (mode, op0);
18184 }
18185 *pop = ix86_expand_compare (code, op0, op1);
18186 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18187 return true;
18188 }
18189
18190 bool
18191 ix86_expand_int_movcc (rtx operands[])
18192 {
18193 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18194 rtx compare_seq, compare_op;
18195 enum machine_mode mode = GET_MODE (operands[0]);
18196 bool sign_bit_compare_p = false;
18197 rtx op0 = XEXP (operands[1], 0);
18198 rtx op1 = XEXP (operands[1], 1);
18199
18200 start_sequence ();
18201 compare_op = ix86_expand_compare (code, op0, op1);
18202 compare_seq = get_insns ();
18203 end_sequence ();
18204
18205 compare_code = GET_CODE (compare_op);
18206
18207 if ((op1 == const0_rtx && (code == GE || code == LT))
18208 || (op1 == constm1_rtx && (code == GT || code == LE)))
18209 sign_bit_compare_p = true;
18210
18211 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18212 HImode insns, we'd be swallowed in word prefix ops. */
18213
18214 if ((mode != HImode || TARGET_FAST_PREFIX)
18215 && (mode != (TARGET_64BIT ? TImode : DImode))
18216 && CONST_INT_P (operands[2])
18217 && CONST_INT_P (operands[3]))
18218 {
18219 rtx out = operands[0];
18220 HOST_WIDE_INT ct = INTVAL (operands[2]);
18221 HOST_WIDE_INT cf = INTVAL (operands[3]);
18222 HOST_WIDE_INT diff;
18223
18224 diff = ct - cf;
18225 /* Sign bit compares are better done using shifts than we do by using
18226 sbb. */
18227 if (sign_bit_compare_p
18228 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18229 {
18230 /* Detect overlap between destination and compare sources. */
18231 rtx tmp = out;
18232
18233 if (!sign_bit_compare_p)
18234 {
18235 rtx flags;
18236 bool fpcmp = false;
18237
18238 compare_code = GET_CODE (compare_op);
18239
18240 flags = XEXP (compare_op, 0);
18241
18242 if (GET_MODE (flags) == CCFPmode
18243 || GET_MODE (flags) == CCFPUmode)
18244 {
18245 fpcmp = true;
18246 compare_code
18247 = ix86_fp_compare_code_to_integer (compare_code);
18248 }
18249
18250 /* To simplify rest of code, restrict to the GEU case. */
18251 if (compare_code == LTU)
18252 {
18253 HOST_WIDE_INT tmp = ct;
18254 ct = cf;
18255 cf = tmp;
18256 compare_code = reverse_condition (compare_code);
18257 code = reverse_condition (code);
18258 }
18259 else
18260 {
18261 if (fpcmp)
18262 PUT_CODE (compare_op,
18263 reverse_condition_maybe_unordered
18264 (GET_CODE (compare_op)));
18265 else
18266 PUT_CODE (compare_op,
18267 reverse_condition (GET_CODE (compare_op)));
18268 }
18269 diff = ct - cf;
18270
18271 if (reg_overlap_mentioned_p (out, op0)
18272 || reg_overlap_mentioned_p (out, op1))
18273 tmp = gen_reg_rtx (mode);
18274
18275 if (mode == DImode)
18276 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18277 else
18278 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18279 flags, compare_op));
18280 }
18281 else
18282 {
18283 if (code == GT || code == GE)
18284 code = reverse_condition (code);
18285 else
18286 {
18287 HOST_WIDE_INT tmp = ct;
18288 ct = cf;
18289 cf = tmp;
18290 diff = ct - cf;
18291 }
18292 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18293 }
18294
18295 if (diff == 1)
18296 {
18297 /*
18298 * cmpl op0,op1
18299 * sbbl dest,dest
18300 * [addl dest, ct]
18301 *
18302 * Size 5 - 8.
18303 */
18304 if (ct)
18305 tmp = expand_simple_binop (mode, PLUS,
18306 tmp, GEN_INT (ct),
18307 copy_rtx (tmp), 1, OPTAB_DIRECT);
18308 }
18309 else if (cf == -1)
18310 {
18311 /*
18312 * cmpl op0,op1
18313 * sbbl dest,dest
18314 * orl $ct, dest
18315 *
18316 * Size 8.
18317 */
18318 tmp = expand_simple_binop (mode, IOR,
18319 tmp, GEN_INT (ct),
18320 copy_rtx (tmp), 1, OPTAB_DIRECT);
18321 }
18322 else if (diff == -1 && ct)
18323 {
18324 /*
18325 * cmpl op0,op1
18326 * sbbl dest,dest
18327 * notl dest
18328 * [addl dest, cf]
18329 *
18330 * Size 8 - 11.
18331 */
18332 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18333 if (cf)
18334 tmp = expand_simple_binop (mode, PLUS,
18335 copy_rtx (tmp), GEN_INT (cf),
18336 copy_rtx (tmp), 1, OPTAB_DIRECT);
18337 }
18338 else
18339 {
18340 /*
18341 * cmpl op0,op1
18342 * sbbl dest,dest
18343 * [notl dest]
18344 * andl cf - ct, dest
18345 * [addl dest, ct]
18346 *
18347 * Size 8 - 11.
18348 */
18349
18350 if (cf == 0)
18351 {
18352 cf = ct;
18353 ct = 0;
18354 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18355 }
18356
18357 tmp = expand_simple_binop (mode, AND,
18358 copy_rtx (tmp),
18359 gen_int_mode (cf - ct, mode),
18360 copy_rtx (tmp), 1, OPTAB_DIRECT);
18361 if (ct)
18362 tmp = expand_simple_binop (mode, PLUS,
18363 copy_rtx (tmp), GEN_INT (ct),
18364 copy_rtx (tmp), 1, OPTAB_DIRECT);
18365 }
18366
18367 if (!rtx_equal_p (tmp, out))
18368 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18369
18370 return true;
18371 }
18372
18373 if (diff < 0)
18374 {
18375 enum machine_mode cmp_mode = GET_MODE (op0);
18376
18377 HOST_WIDE_INT tmp;
18378 tmp = ct, ct = cf, cf = tmp;
18379 diff = -diff;
18380
18381 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18382 {
18383 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18384
18385 /* We may be reversing unordered compare to normal compare, that
18386 is not valid in general (we may convert non-trapping condition
18387 to trapping one), however on i386 we currently emit all
18388 comparisons unordered. */
18389 compare_code = reverse_condition_maybe_unordered (compare_code);
18390 code = reverse_condition_maybe_unordered (code);
18391 }
18392 else
18393 {
18394 compare_code = reverse_condition (compare_code);
18395 code = reverse_condition (code);
18396 }
18397 }
18398
18399 compare_code = UNKNOWN;
18400 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18401 && CONST_INT_P (op1))
18402 {
18403 if (op1 == const0_rtx
18404 && (code == LT || code == GE))
18405 compare_code = code;
18406 else if (op1 == constm1_rtx)
18407 {
18408 if (code == LE)
18409 compare_code = LT;
18410 else if (code == GT)
18411 compare_code = GE;
18412 }
18413 }
18414
18415 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18416 if (compare_code != UNKNOWN
18417 && GET_MODE (op0) == GET_MODE (out)
18418 && (cf == -1 || ct == -1))
18419 {
18420 /* If lea code below could be used, only optimize
18421 if it results in a 2 insn sequence. */
18422
18423 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18424 || diff == 3 || diff == 5 || diff == 9)
18425 || (compare_code == LT && ct == -1)
18426 || (compare_code == GE && cf == -1))
18427 {
18428 /*
18429 * notl op1 (if necessary)
18430 * sarl $31, op1
18431 * orl cf, op1
18432 */
18433 if (ct != -1)
18434 {
18435 cf = ct;
18436 ct = -1;
18437 code = reverse_condition (code);
18438 }
18439
18440 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18441
18442 out = expand_simple_binop (mode, IOR,
18443 out, GEN_INT (cf),
18444 out, 1, OPTAB_DIRECT);
18445 if (out != operands[0])
18446 emit_move_insn (operands[0], out);
18447
18448 return true;
18449 }
18450 }
18451
18452
18453 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18454 || diff == 3 || diff == 5 || diff == 9)
18455 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18456 && (mode != DImode
18457 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18458 {
18459 /*
18460 * xorl dest,dest
18461 * cmpl op1,op2
18462 * setcc dest
18463 * lea cf(dest*(ct-cf)),dest
18464 *
18465 * Size 14.
18466 *
18467 * This also catches the degenerate setcc-only case.
18468 */
18469
18470 rtx tmp;
18471 int nops;
18472
18473 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18474
18475 nops = 0;
18476 /* On x86_64 the lea instruction operates on Pmode, so we need
18477 to get arithmetics done in proper mode to match. */
18478 if (diff == 1)
18479 tmp = copy_rtx (out);
18480 else
18481 {
18482 rtx out1;
18483 out1 = copy_rtx (out);
18484 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18485 nops++;
18486 if (diff & 1)
18487 {
18488 tmp = gen_rtx_PLUS (mode, tmp, out1);
18489 nops++;
18490 }
18491 }
18492 if (cf != 0)
18493 {
18494 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18495 nops++;
18496 }
18497 if (!rtx_equal_p (tmp, out))
18498 {
18499 if (nops == 1)
18500 out = force_operand (tmp, copy_rtx (out));
18501 else
18502 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18503 }
18504 if (!rtx_equal_p (out, operands[0]))
18505 emit_move_insn (operands[0], copy_rtx (out));
18506
18507 return true;
18508 }
18509
18510 /*
18511 * General case: Jumpful:
18512 * xorl dest,dest cmpl op1, op2
18513 * cmpl op1, op2 movl ct, dest
18514 * setcc dest jcc 1f
18515 * decl dest movl cf, dest
18516 * andl (cf-ct),dest 1:
18517 * addl ct,dest
18518 *
18519 * Size 20. Size 14.
18520 *
18521 * This is reasonably steep, but branch mispredict costs are
18522 * high on modern cpus, so consider failing only if optimizing
18523 * for space.
18524 */
18525
18526 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18527 && BRANCH_COST (optimize_insn_for_speed_p (),
18528 false) >= 2)
18529 {
18530 if (cf == 0)
18531 {
18532 enum machine_mode cmp_mode = GET_MODE (op0);
18533
18534 cf = ct;
18535 ct = 0;
18536
18537 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18538 {
18539 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18540
18541 /* We may be reversing unordered compare to normal compare,
18542 that is not valid in general (we may convert non-trapping
18543 condition to trapping one), however on i386 we currently
18544 emit all comparisons unordered. */
18545 code = reverse_condition_maybe_unordered (code);
18546 }
18547 else
18548 {
18549 code = reverse_condition (code);
18550 if (compare_code != UNKNOWN)
18551 compare_code = reverse_condition (compare_code);
18552 }
18553 }
18554
18555 if (compare_code != UNKNOWN)
18556 {
18557 /* notl op1 (if needed)
18558 sarl $31, op1
18559 andl (cf-ct), op1
18560 addl ct, op1
18561
18562 For x < 0 (resp. x <= -1) there will be no notl,
18563 so if possible swap the constants to get rid of the
18564 complement.
18565 True/false will be -1/0 while code below (store flag
18566 followed by decrement) is 0/-1, so the constants need
18567 to be exchanged once more. */
18568
18569 if (compare_code == GE || !cf)
18570 {
18571 code = reverse_condition (code);
18572 compare_code = LT;
18573 }
18574 else
18575 {
18576 HOST_WIDE_INT tmp = cf;
18577 cf = ct;
18578 ct = tmp;
18579 }
18580
18581 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18582 }
18583 else
18584 {
18585 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18586
18587 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18588 constm1_rtx,
18589 copy_rtx (out), 1, OPTAB_DIRECT);
18590 }
18591
18592 out = expand_simple_binop (mode, AND, copy_rtx (out),
18593 gen_int_mode (cf - ct, mode),
18594 copy_rtx (out), 1, OPTAB_DIRECT);
18595 if (ct)
18596 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18597 copy_rtx (out), 1, OPTAB_DIRECT);
18598 if (!rtx_equal_p (out, operands[0]))
18599 emit_move_insn (operands[0], copy_rtx (out));
18600
18601 return true;
18602 }
18603 }
18604
18605 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18606 {
18607 /* Try a few things more with specific constants and a variable. */
18608
18609 optab op;
18610 rtx var, orig_out, out, tmp;
18611
18612 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18613 return false;
18614
18615 /* If one of the two operands is an interesting constant, load a
18616 constant with the above and mask it in with a logical operation. */
18617
18618 if (CONST_INT_P (operands[2]))
18619 {
18620 var = operands[3];
18621 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18622 operands[3] = constm1_rtx, op = and_optab;
18623 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18624 operands[3] = const0_rtx, op = ior_optab;
18625 else
18626 return false;
18627 }
18628 else if (CONST_INT_P (operands[3]))
18629 {
18630 var = operands[2];
18631 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18632 operands[2] = constm1_rtx, op = and_optab;
18633 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18634 operands[2] = const0_rtx, op = ior_optab;
18635 else
18636 return false;
18637 }
18638 else
18639 return false;
18640
18641 orig_out = operands[0];
18642 tmp = gen_reg_rtx (mode);
18643 operands[0] = tmp;
18644
18645 /* Recurse to get the constant loaded. */
18646 if (ix86_expand_int_movcc (operands) == 0)
18647 return false;
18648
18649 /* Mask in the interesting variable. */
18650 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18651 OPTAB_WIDEN);
18652 if (!rtx_equal_p (out, orig_out))
18653 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18654
18655 return true;
18656 }
18657
18658 /*
18659 * For comparison with above,
18660 *
18661 * movl cf,dest
18662 * movl ct,tmp
18663 * cmpl op1,op2
18664 * cmovcc tmp,dest
18665 *
18666 * Size 15.
18667 */
18668
18669 if (! nonimmediate_operand (operands[2], mode))
18670 operands[2] = force_reg (mode, operands[2]);
18671 if (! nonimmediate_operand (operands[3], mode))
18672 operands[3] = force_reg (mode, operands[3]);
18673
18674 if (! register_operand (operands[2], VOIDmode)
18675 && (mode == QImode
18676 || ! register_operand (operands[3], VOIDmode)))
18677 operands[2] = force_reg (mode, operands[2]);
18678
18679 if (mode == QImode
18680 && ! register_operand (operands[3], VOIDmode))
18681 operands[3] = force_reg (mode, operands[3]);
18682
18683 emit_insn (compare_seq);
18684 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18685 gen_rtx_IF_THEN_ELSE (mode,
18686 compare_op, operands[2],
18687 operands[3])));
18688 return true;
18689 }
18690
18691 /* Swap, force into registers, or otherwise massage the two operands
18692 to an sse comparison with a mask result. Thus we differ a bit from
18693 ix86_prepare_fp_compare_args which expects to produce a flags result.
18694
18695 The DEST operand exists to help determine whether to commute commutative
18696 operators. The POP0/POP1 operands are updated in place. The new
18697 comparison code is returned, or UNKNOWN if not implementable. */
18698
18699 static enum rtx_code
18700 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18701 rtx *pop0, rtx *pop1)
18702 {
18703 rtx tmp;
18704
18705 switch (code)
18706 {
18707 case LTGT:
18708 case UNEQ:
18709 /* We have no LTGT as an operator. We could implement it with
18710 NE & ORDERED, but this requires an extra temporary. It's
18711 not clear that it's worth it. */
18712 return UNKNOWN;
18713
18714 case LT:
18715 case LE:
18716 case UNGT:
18717 case UNGE:
18718 /* These are supported directly. */
18719 break;
18720
18721 case EQ:
18722 case NE:
18723 case UNORDERED:
18724 case ORDERED:
18725 /* For commutative operators, try to canonicalize the destination
18726 operand to be first in the comparison - this helps reload to
18727 avoid extra moves. */
18728 if (!dest || !rtx_equal_p (dest, *pop1))
18729 break;
18730 /* FALLTHRU */
18731
18732 case GE:
18733 case GT:
18734 case UNLE:
18735 case UNLT:
18736 /* These are not supported directly. Swap the comparison operands
18737 to transform into something that is supported. */
18738 tmp = *pop0;
18739 *pop0 = *pop1;
18740 *pop1 = tmp;
18741 code = swap_condition (code);
18742 break;
18743
18744 default:
18745 gcc_unreachable ();
18746 }
18747
18748 return code;
18749 }
18750
18751 /* Detect conditional moves that exactly match min/max operational
18752 semantics. Note that this is IEEE safe, as long as we don't
18753 interchange the operands.
18754
18755 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18756 and TRUE if the operation is successful and instructions are emitted. */
18757
18758 static bool
18759 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18760 rtx cmp_op1, rtx if_true, rtx if_false)
18761 {
18762 enum machine_mode mode;
18763 bool is_min;
18764 rtx tmp;
18765
18766 if (code == LT)
18767 ;
18768 else if (code == UNGE)
18769 {
18770 tmp = if_true;
18771 if_true = if_false;
18772 if_false = tmp;
18773 }
18774 else
18775 return false;
18776
18777 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18778 is_min = true;
18779 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18780 is_min = false;
18781 else
18782 return false;
18783
18784 mode = GET_MODE (dest);
18785
18786 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18787 but MODE may be a vector mode and thus not appropriate. */
18788 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18789 {
18790 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18791 rtvec v;
18792
18793 if_true = force_reg (mode, if_true);
18794 v = gen_rtvec (2, if_true, if_false);
18795 tmp = gen_rtx_UNSPEC (mode, v, u);
18796 }
18797 else
18798 {
18799 code = is_min ? SMIN : SMAX;
18800 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18801 }
18802
18803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18804 return true;
18805 }
18806
18807 /* Expand an sse vector comparison. Return the register with the result. */
18808
18809 static rtx
18810 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18811 rtx op_true, rtx op_false)
18812 {
18813 enum machine_mode mode = GET_MODE (dest);
18814 rtx x;
18815
18816 cmp_op0 = force_reg (mode, cmp_op0);
18817 if (!nonimmediate_operand (cmp_op1, mode))
18818 cmp_op1 = force_reg (mode, cmp_op1);
18819
18820 if (optimize
18821 || reg_overlap_mentioned_p (dest, op_true)
18822 || reg_overlap_mentioned_p (dest, op_false))
18823 dest = gen_reg_rtx (mode);
18824
18825 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18827
18828 return dest;
18829 }
18830
18831 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18832 operations. This is used for both scalar and vector conditional moves. */
18833
18834 static void
18835 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18836 {
18837 enum machine_mode mode = GET_MODE (dest);
18838 rtx t2, t3, x;
18839
18840 if (op_false == CONST0_RTX (mode))
18841 {
18842 op_true = force_reg (mode, op_true);
18843 x = gen_rtx_AND (mode, cmp, op_true);
18844 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18845 }
18846 else if (op_true == CONST0_RTX (mode))
18847 {
18848 op_false = force_reg (mode, op_false);
18849 x = gen_rtx_NOT (mode, cmp);
18850 x = gen_rtx_AND (mode, x, op_false);
18851 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18852 }
18853 else if (TARGET_XOP)
18854 {
18855 rtx pcmov = gen_rtx_SET (mode, dest,
18856 gen_rtx_IF_THEN_ELSE (mode, cmp,
18857 op_true,
18858 op_false));
18859 emit_insn (pcmov);
18860 }
18861 else
18862 {
18863 op_true = force_reg (mode, op_true);
18864 op_false = force_reg (mode, op_false);
18865
18866 t2 = gen_reg_rtx (mode);
18867 if (optimize)
18868 t3 = gen_reg_rtx (mode);
18869 else
18870 t3 = dest;
18871
18872 x = gen_rtx_AND (mode, op_true, cmp);
18873 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18874
18875 x = gen_rtx_NOT (mode, cmp);
18876 x = gen_rtx_AND (mode, x, op_false);
18877 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18878
18879 x = gen_rtx_IOR (mode, t3, t2);
18880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18881 }
18882 }
18883
18884 /* Expand a floating-point conditional move. Return true if successful. */
18885
18886 bool
18887 ix86_expand_fp_movcc (rtx operands[])
18888 {
18889 enum machine_mode mode = GET_MODE (operands[0]);
18890 enum rtx_code code = GET_CODE (operands[1]);
18891 rtx tmp, compare_op;
18892 rtx op0 = XEXP (operands[1], 0);
18893 rtx op1 = XEXP (operands[1], 1);
18894
18895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18896 {
18897 enum machine_mode cmode;
18898
18899 /* Since we've no cmove for sse registers, don't force bad register
18900 allocation just to gain access to it. Deny movcc when the
18901 comparison mode doesn't match the move mode. */
18902 cmode = GET_MODE (op0);
18903 if (cmode == VOIDmode)
18904 cmode = GET_MODE (op1);
18905 if (cmode != mode)
18906 return false;
18907
18908 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18909 if (code == UNKNOWN)
18910 return false;
18911
18912 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18913 operands[2], operands[3]))
18914 return true;
18915
18916 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18917 operands[2], operands[3]);
18918 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18919 return true;
18920 }
18921
18922 /* The floating point conditional move instructions don't directly
18923 support conditions resulting from a signed integer comparison. */
18924
18925 compare_op = ix86_expand_compare (code, op0, op1);
18926 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18927 {
18928 tmp = gen_reg_rtx (QImode);
18929 ix86_expand_setcc (tmp, code, op0, op1);
18930
18931 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18932 }
18933
18934 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18935 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18936 operands[2], operands[3])));
18937
18938 return true;
18939 }
18940
18941 /* Expand a floating-point vector conditional move; a vcond operation
18942 rather than a movcc operation. */
18943
18944 bool
18945 ix86_expand_fp_vcond (rtx operands[])
18946 {
18947 enum rtx_code code = GET_CODE (operands[3]);
18948 rtx cmp;
18949
18950 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18951 &operands[4], &operands[5]);
18952 if (code == UNKNOWN)
18953 return false;
18954
18955 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18956 operands[5], operands[1], operands[2]))
18957 return true;
18958
18959 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18960 operands[1], operands[2]);
18961 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18962 return true;
18963 }
18964
18965 /* Expand a signed/unsigned integral vector conditional move. */
18966
18967 bool
18968 ix86_expand_int_vcond (rtx operands[])
18969 {
18970 enum machine_mode mode = GET_MODE (operands[0]);
18971 enum rtx_code code = GET_CODE (operands[3]);
18972 bool negate = false;
18973 rtx x, cop0, cop1;
18974
18975 cop0 = operands[4];
18976 cop1 = operands[5];
18977
18978 /* XOP supports all of the comparisons on all vector int types. */
18979 if (!TARGET_XOP)
18980 {
18981 /* Canonicalize the comparison to EQ, GT, GTU. */
18982 switch (code)
18983 {
18984 case EQ:
18985 case GT:
18986 case GTU:
18987 break;
18988
18989 case NE:
18990 case LE:
18991 case LEU:
18992 code = reverse_condition (code);
18993 negate = true;
18994 break;
18995
18996 case GE:
18997 case GEU:
18998 code = reverse_condition (code);
18999 negate = true;
19000 /* FALLTHRU */
19001
19002 case LT:
19003 case LTU:
19004 code = swap_condition (code);
19005 x = cop0, cop0 = cop1, cop1 = x;
19006 break;
19007
19008 default:
19009 gcc_unreachable ();
19010 }
19011
19012 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19013 if (mode == V2DImode)
19014 {
19015 switch (code)
19016 {
19017 case EQ:
19018 /* SSE4.1 supports EQ. */
19019 if (!TARGET_SSE4_1)
19020 return false;
19021 break;
19022
19023 case GT:
19024 case GTU:
19025 /* SSE4.2 supports GT/GTU. */
19026 if (!TARGET_SSE4_2)
19027 return false;
19028 break;
19029
19030 default:
19031 gcc_unreachable ();
19032 }
19033 }
19034
19035 /* Unsigned parallel compare is not supported by the hardware.
19036 Play some tricks to turn this into a signed comparison
19037 against 0. */
19038 if (code == GTU)
19039 {
19040 cop0 = force_reg (mode, cop0);
19041
19042 switch (mode)
19043 {
19044 case V4SImode:
19045 case V2DImode:
19046 {
19047 rtx t1, t2, mask;
19048 rtx (*gen_sub3) (rtx, rtx, rtx);
19049
19050 /* Subtract (-(INT MAX) - 1) from both operands to make
19051 them signed. */
19052 mask = ix86_build_signbit_mask (mode, true, false);
19053 gen_sub3 = (mode == V4SImode
19054 ? gen_subv4si3 : gen_subv2di3);
19055 t1 = gen_reg_rtx (mode);
19056 emit_insn (gen_sub3 (t1, cop0, mask));
19057
19058 t2 = gen_reg_rtx (mode);
19059 emit_insn (gen_sub3 (t2, cop1, mask));
19060
19061 cop0 = t1;
19062 cop1 = t2;
19063 code = GT;
19064 }
19065 break;
19066
19067 case V16QImode:
19068 case V8HImode:
19069 /* Perform a parallel unsigned saturating subtraction. */
19070 x = gen_reg_rtx (mode);
19071 emit_insn (gen_rtx_SET (VOIDmode, x,
19072 gen_rtx_US_MINUS (mode, cop0, cop1)));
19073
19074 cop0 = x;
19075 cop1 = CONST0_RTX (mode);
19076 code = EQ;
19077 negate = !negate;
19078 break;
19079
19080 default:
19081 gcc_unreachable ();
19082 }
19083 }
19084 }
19085
19086 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19087 operands[1+negate], operands[2-negate]);
19088
19089 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19090 operands[2-negate]);
19091 return true;
19092 }
19093
19094 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19095 true if we should do zero extension, else sign extension. HIGH_P is
19096 true if we want the N/2 high elements, else the low elements. */
19097
19098 void
19099 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19100 {
19101 enum machine_mode imode = GET_MODE (operands[1]);
19102 rtx tmp, dest;
19103
19104 if (TARGET_SSE4_1)
19105 {
19106 rtx (*unpack)(rtx, rtx);
19107
19108 switch (imode)
19109 {
19110 case V16QImode:
19111 if (unsigned_p)
19112 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19113 else
19114 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19115 break;
19116 case V8HImode:
19117 if (unsigned_p)
19118 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19119 else
19120 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19121 break;
19122 case V4SImode:
19123 if (unsigned_p)
19124 unpack = gen_sse4_1_zero_extendv2siv2di2;
19125 else
19126 unpack = gen_sse4_1_sign_extendv2siv2di2;
19127 break;
19128 default:
19129 gcc_unreachable ();
19130 }
19131
19132 if (high_p)
19133 {
19134 /* Shift higher 8 bytes to lower 8 bytes. */
19135 tmp = gen_reg_rtx (imode);
19136 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19137 gen_lowpart (V1TImode, operands[1]),
19138 GEN_INT (64)));
19139 }
19140 else
19141 tmp = operands[1];
19142
19143 emit_insn (unpack (operands[0], tmp));
19144 }
19145 else
19146 {
19147 rtx (*unpack)(rtx, rtx, rtx);
19148
19149 switch (imode)
19150 {
19151 case V16QImode:
19152 if (high_p)
19153 unpack = gen_vec_interleave_highv16qi;
19154 else
19155 unpack = gen_vec_interleave_lowv16qi;
19156 break;
19157 case V8HImode:
19158 if (high_p)
19159 unpack = gen_vec_interleave_highv8hi;
19160 else
19161 unpack = gen_vec_interleave_lowv8hi;
19162 break;
19163 case V4SImode:
19164 if (high_p)
19165 unpack = gen_vec_interleave_highv4si;
19166 else
19167 unpack = gen_vec_interleave_lowv4si;
19168 break;
19169 default:
19170 gcc_unreachable ();
19171 }
19172
19173 dest = gen_lowpart (imode, operands[0]);
19174
19175 if (unsigned_p)
19176 tmp = force_reg (imode, CONST0_RTX (imode));
19177 else
19178 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19179 operands[1], pc_rtx, pc_rtx);
19180
19181 emit_insn (unpack (dest, operands[1], tmp));
19182 }
19183 }
19184
19185 /* Expand conditional increment or decrement using adb/sbb instructions.
19186 The default case using setcc followed by the conditional move can be
19187 done by generic code. */
19188 bool
19189 ix86_expand_int_addcc (rtx operands[])
19190 {
19191 enum rtx_code code = GET_CODE (operands[1]);
19192 rtx flags;
19193 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19194 rtx compare_op;
19195 rtx val = const0_rtx;
19196 bool fpcmp = false;
19197 enum machine_mode mode;
19198 rtx op0 = XEXP (operands[1], 0);
19199 rtx op1 = XEXP (operands[1], 1);
19200
19201 if (operands[3] != const1_rtx
19202 && operands[3] != constm1_rtx)
19203 return false;
19204 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19205 return false;
19206 code = GET_CODE (compare_op);
19207
19208 flags = XEXP (compare_op, 0);
19209
19210 if (GET_MODE (flags) == CCFPmode
19211 || GET_MODE (flags) == CCFPUmode)
19212 {
19213 fpcmp = true;
19214 code = ix86_fp_compare_code_to_integer (code);
19215 }
19216
19217 if (code != LTU)
19218 {
19219 val = constm1_rtx;
19220 if (fpcmp)
19221 PUT_CODE (compare_op,
19222 reverse_condition_maybe_unordered
19223 (GET_CODE (compare_op)));
19224 else
19225 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19226 }
19227
19228 mode = GET_MODE (operands[0]);
19229
19230 /* Construct either adc or sbb insn. */
19231 if ((code == LTU) == (operands[3] == constm1_rtx))
19232 {
19233 switch (mode)
19234 {
19235 case QImode:
19236 insn = gen_subqi3_carry;
19237 break;
19238 case HImode:
19239 insn = gen_subhi3_carry;
19240 break;
19241 case SImode:
19242 insn = gen_subsi3_carry;
19243 break;
19244 case DImode:
19245 insn = gen_subdi3_carry;
19246 break;
19247 default:
19248 gcc_unreachable ();
19249 }
19250 }
19251 else
19252 {
19253 switch (mode)
19254 {
19255 case QImode:
19256 insn = gen_addqi3_carry;
19257 break;
19258 case HImode:
19259 insn = gen_addhi3_carry;
19260 break;
19261 case SImode:
19262 insn = gen_addsi3_carry;
19263 break;
19264 case DImode:
19265 insn = gen_adddi3_carry;
19266 break;
19267 default:
19268 gcc_unreachable ();
19269 }
19270 }
19271 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19272
19273 return true;
19274 }
19275
19276
19277 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19278 but works for floating pointer parameters and nonoffsetable memories.
19279 For pushes, it returns just stack offsets; the values will be saved
19280 in the right order. Maximally three parts are generated. */
19281
19282 static int
19283 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19284 {
19285 int size;
19286
19287 if (!TARGET_64BIT)
19288 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19289 else
19290 size = (GET_MODE_SIZE (mode) + 4) / 8;
19291
19292 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19293 gcc_assert (size >= 2 && size <= 4);
19294
19295 /* Optimize constant pool reference to immediates. This is used by fp
19296 moves, that force all constants to memory to allow combining. */
19297 if (MEM_P (operand) && MEM_READONLY_P (operand))
19298 {
19299 rtx tmp = maybe_get_pool_constant (operand);
19300 if (tmp)
19301 operand = tmp;
19302 }
19303
19304 if (MEM_P (operand) && !offsettable_memref_p (operand))
19305 {
19306 /* The only non-offsetable memories we handle are pushes. */
19307 int ok = push_operand (operand, VOIDmode);
19308
19309 gcc_assert (ok);
19310
19311 operand = copy_rtx (operand);
19312 PUT_MODE (operand, Pmode);
19313 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19314 return size;
19315 }
19316
19317 if (GET_CODE (operand) == CONST_VECTOR)
19318 {
19319 enum machine_mode imode = int_mode_for_mode (mode);
19320 /* Caution: if we looked through a constant pool memory above,
19321 the operand may actually have a different mode now. That's
19322 ok, since we want to pun this all the way back to an integer. */
19323 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19324 gcc_assert (operand != NULL);
19325 mode = imode;
19326 }
19327
19328 if (!TARGET_64BIT)
19329 {
19330 if (mode == DImode)
19331 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19332 else
19333 {
19334 int i;
19335
19336 if (REG_P (operand))
19337 {
19338 gcc_assert (reload_completed);
19339 for (i = 0; i < size; i++)
19340 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19341 }
19342 else if (offsettable_memref_p (operand))
19343 {
19344 operand = adjust_address (operand, SImode, 0);
19345 parts[0] = operand;
19346 for (i = 1; i < size; i++)
19347 parts[i] = adjust_address (operand, SImode, 4 * i);
19348 }
19349 else if (GET_CODE (operand) == CONST_DOUBLE)
19350 {
19351 REAL_VALUE_TYPE r;
19352 long l[4];
19353
19354 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19355 switch (mode)
19356 {
19357 case TFmode:
19358 real_to_target (l, &r, mode);
19359 parts[3] = gen_int_mode (l[3], SImode);
19360 parts[2] = gen_int_mode (l[2], SImode);
19361 break;
19362 case XFmode:
19363 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19364 parts[2] = gen_int_mode (l[2], SImode);
19365 break;
19366 case DFmode:
19367 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19368 break;
19369 default:
19370 gcc_unreachable ();
19371 }
19372 parts[1] = gen_int_mode (l[1], SImode);
19373 parts[0] = gen_int_mode (l[0], SImode);
19374 }
19375 else
19376 gcc_unreachable ();
19377 }
19378 }
19379 else
19380 {
19381 if (mode == TImode)
19382 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19383 if (mode == XFmode || mode == TFmode)
19384 {
19385 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19386 if (REG_P (operand))
19387 {
19388 gcc_assert (reload_completed);
19389 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19390 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19391 }
19392 else if (offsettable_memref_p (operand))
19393 {
19394 operand = adjust_address (operand, DImode, 0);
19395 parts[0] = operand;
19396 parts[1] = adjust_address (operand, upper_mode, 8);
19397 }
19398 else if (GET_CODE (operand) == CONST_DOUBLE)
19399 {
19400 REAL_VALUE_TYPE r;
19401 long l[4];
19402
19403 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19404 real_to_target (l, &r, mode);
19405
19406 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19407 if (HOST_BITS_PER_WIDE_INT >= 64)
19408 parts[0]
19409 = gen_int_mode
19410 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19411 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19412 DImode);
19413 else
19414 parts[0] = immed_double_const (l[0], l[1], DImode);
19415
19416 if (upper_mode == SImode)
19417 parts[1] = gen_int_mode (l[2], SImode);
19418 else if (HOST_BITS_PER_WIDE_INT >= 64)
19419 parts[1]
19420 = gen_int_mode
19421 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19422 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19423 DImode);
19424 else
19425 parts[1] = immed_double_const (l[2], l[3], DImode);
19426 }
19427 else
19428 gcc_unreachable ();
19429 }
19430 }
19431
19432 return size;
19433 }
19434
19435 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19436 Return false when normal moves are needed; true when all required
19437 insns have been emitted. Operands 2-4 contain the input values
19438 int the correct order; operands 5-7 contain the output values. */
19439
19440 void
19441 ix86_split_long_move (rtx operands[])
19442 {
19443 rtx part[2][4];
19444 int nparts, i, j;
19445 int push = 0;
19446 int collisions = 0;
19447 enum machine_mode mode = GET_MODE (operands[0]);
19448 bool collisionparts[4];
19449
19450 /* The DFmode expanders may ask us to move double.
19451 For 64bit target this is single move. By hiding the fact
19452 here we simplify i386.md splitters. */
19453 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19454 {
19455 /* Optimize constant pool reference to immediates. This is used by
19456 fp moves, that force all constants to memory to allow combining. */
19457
19458 if (MEM_P (operands[1])
19459 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19460 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19461 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19462 if (push_operand (operands[0], VOIDmode))
19463 {
19464 operands[0] = copy_rtx (operands[0]);
19465 PUT_MODE (operands[0], Pmode);
19466 }
19467 else
19468 operands[0] = gen_lowpart (DImode, operands[0]);
19469 operands[1] = gen_lowpart (DImode, operands[1]);
19470 emit_move_insn (operands[0], operands[1]);
19471 return;
19472 }
19473
19474 /* The only non-offsettable memory we handle is push. */
19475 if (push_operand (operands[0], VOIDmode))
19476 push = 1;
19477 else
19478 gcc_assert (!MEM_P (operands[0])
19479 || offsettable_memref_p (operands[0]));
19480
19481 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19482 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19483
19484 /* When emitting push, take care for source operands on the stack. */
19485 if (push && MEM_P (operands[1])
19486 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19487 {
19488 rtx src_base = XEXP (part[1][nparts - 1], 0);
19489
19490 /* Compensate for the stack decrement by 4. */
19491 if (!TARGET_64BIT && nparts == 3
19492 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19493 src_base = plus_constant (src_base, 4);
19494
19495 /* src_base refers to the stack pointer and is
19496 automatically decreased by emitted push. */
19497 for (i = 0; i < nparts; i++)
19498 part[1][i] = change_address (part[1][i],
19499 GET_MODE (part[1][i]), src_base);
19500 }
19501
19502 /* We need to do copy in the right order in case an address register
19503 of the source overlaps the destination. */
19504 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19505 {
19506 rtx tmp;
19507
19508 for (i = 0; i < nparts; i++)
19509 {
19510 collisionparts[i]
19511 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19512 if (collisionparts[i])
19513 collisions++;
19514 }
19515
19516 /* Collision in the middle part can be handled by reordering. */
19517 if (collisions == 1 && nparts == 3 && collisionparts [1])
19518 {
19519 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19520 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19521 }
19522 else if (collisions == 1
19523 && nparts == 4
19524 && (collisionparts [1] || collisionparts [2]))
19525 {
19526 if (collisionparts [1])
19527 {
19528 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19529 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19530 }
19531 else
19532 {
19533 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19534 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19535 }
19536 }
19537
19538 /* If there are more collisions, we can't handle it by reordering.
19539 Do an lea to the last part and use only one colliding move. */
19540 else if (collisions > 1)
19541 {
19542 rtx base;
19543
19544 collisions = 1;
19545
19546 base = part[0][nparts - 1];
19547
19548 /* Handle the case when the last part isn't valid for lea.
19549 Happens in 64-bit mode storing the 12-byte XFmode. */
19550 if (GET_MODE (base) != Pmode)
19551 base = gen_rtx_REG (Pmode, REGNO (base));
19552
19553 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19554 part[1][0] = replace_equiv_address (part[1][0], base);
19555 for (i = 1; i < nparts; i++)
19556 {
19557 tmp = plus_constant (base, UNITS_PER_WORD * i);
19558 part[1][i] = replace_equiv_address (part[1][i], tmp);
19559 }
19560 }
19561 }
19562
19563 if (push)
19564 {
19565 if (!TARGET_64BIT)
19566 {
19567 if (nparts == 3)
19568 {
19569 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19570 emit_insn (gen_addsi3 (stack_pointer_rtx,
19571 stack_pointer_rtx, GEN_INT (-4)));
19572 emit_move_insn (part[0][2], part[1][2]);
19573 }
19574 else if (nparts == 4)
19575 {
19576 emit_move_insn (part[0][3], part[1][3]);
19577 emit_move_insn (part[0][2], part[1][2]);
19578 }
19579 }
19580 else
19581 {
19582 /* In 64bit mode we don't have 32bit push available. In case this is
19583 register, it is OK - we will just use larger counterpart. We also
19584 retype memory - these comes from attempt to avoid REX prefix on
19585 moving of second half of TFmode value. */
19586 if (GET_MODE (part[1][1]) == SImode)
19587 {
19588 switch (GET_CODE (part[1][1]))
19589 {
19590 case MEM:
19591 part[1][1] = adjust_address (part[1][1], DImode, 0);
19592 break;
19593
19594 case REG:
19595 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19596 break;
19597
19598 default:
19599 gcc_unreachable ();
19600 }
19601
19602 if (GET_MODE (part[1][0]) == SImode)
19603 part[1][0] = part[1][1];
19604 }
19605 }
19606 emit_move_insn (part[0][1], part[1][1]);
19607 emit_move_insn (part[0][0], part[1][0]);
19608 return;
19609 }
19610
19611 /* Choose correct order to not overwrite the source before it is copied. */
19612 if ((REG_P (part[0][0])
19613 && REG_P (part[1][1])
19614 && (REGNO (part[0][0]) == REGNO (part[1][1])
19615 || (nparts == 3
19616 && REGNO (part[0][0]) == REGNO (part[1][2]))
19617 || (nparts == 4
19618 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19619 || (collisions > 0
19620 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19621 {
19622 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19623 {
19624 operands[2 + i] = part[0][j];
19625 operands[6 + i] = part[1][j];
19626 }
19627 }
19628 else
19629 {
19630 for (i = 0; i < nparts; i++)
19631 {
19632 operands[2 + i] = part[0][i];
19633 operands[6 + i] = part[1][i];
19634 }
19635 }
19636
19637 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19638 if (optimize_insn_for_size_p ())
19639 {
19640 for (j = 0; j < nparts - 1; j++)
19641 if (CONST_INT_P (operands[6 + j])
19642 && operands[6 + j] != const0_rtx
19643 && REG_P (operands[2 + j]))
19644 for (i = j; i < nparts - 1; i++)
19645 if (CONST_INT_P (operands[7 + i])
19646 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19647 operands[7 + i] = operands[2 + j];
19648 }
19649
19650 for (i = 0; i < nparts; i++)
19651 emit_move_insn (operands[2 + i], operands[6 + i]);
19652
19653 return;
19654 }
19655
19656 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19657 left shift by a constant, either using a single shift or
19658 a sequence of add instructions. */
19659
19660 static void
19661 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19662 {
19663 rtx (*insn)(rtx, rtx, rtx);
19664
19665 if (count == 1
19666 || (count * ix86_cost->add <= ix86_cost->shift_const
19667 && !optimize_insn_for_size_p ()))
19668 {
19669 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19670 while (count-- > 0)
19671 emit_insn (insn (operand, operand, operand));
19672 }
19673 else
19674 {
19675 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19676 emit_insn (insn (operand, operand, GEN_INT (count)));
19677 }
19678 }
19679
19680 void
19681 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19682 {
19683 rtx (*gen_ashl3)(rtx, rtx, rtx);
19684 rtx (*gen_shld)(rtx, rtx, rtx);
19685 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19686
19687 rtx low[2], high[2];
19688 int count;
19689
19690 if (CONST_INT_P (operands[2]))
19691 {
19692 split_double_mode (mode, operands, 2, low, high);
19693 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19694
19695 if (count >= half_width)
19696 {
19697 emit_move_insn (high[0], low[1]);
19698 emit_move_insn (low[0], const0_rtx);
19699
19700 if (count > half_width)
19701 ix86_expand_ashl_const (high[0], count - half_width, mode);
19702 }
19703 else
19704 {
19705 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19706
19707 if (!rtx_equal_p (operands[0], operands[1]))
19708 emit_move_insn (operands[0], operands[1]);
19709
19710 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19711 ix86_expand_ashl_const (low[0], count, mode);
19712 }
19713 return;
19714 }
19715
19716 split_double_mode (mode, operands, 1, low, high);
19717
19718 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19719
19720 if (operands[1] == const1_rtx)
19721 {
19722 /* Assuming we've chosen a QImode capable registers, then 1 << N
19723 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19724 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19725 {
19726 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19727
19728 ix86_expand_clear (low[0]);
19729 ix86_expand_clear (high[0]);
19730 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19731
19732 d = gen_lowpart (QImode, low[0]);
19733 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19734 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19735 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19736
19737 d = gen_lowpart (QImode, high[0]);
19738 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19739 s = gen_rtx_NE (QImode, flags, const0_rtx);
19740 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19741 }
19742
19743 /* Otherwise, we can get the same results by manually performing
19744 a bit extract operation on bit 5/6, and then performing the two
19745 shifts. The two methods of getting 0/1 into low/high are exactly
19746 the same size. Avoiding the shift in the bit extract case helps
19747 pentium4 a bit; no one else seems to care much either way. */
19748 else
19749 {
19750 enum machine_mode half_mode;
19751 rtx (*gen_lshr3)(rtx, rtx, rtx);
19752 rtx (*gen_and3)(rtx, rtx, rtx);
19753 rtx (*gen_xor3)(rtx, rtx, rtx);
19754 HOST_WIDE_INT bits;
19755 rtx x;
19756
19757 if (mode == DImode)
19758 {
19759 half_mode = SImode;
19760 gen_lshr3 = gen_lshrsi3;
19761 gen_and3 = gen_andsi3;
19762 gen_xor3 = gen_xorsi3;
19763 bits = 5;
19764 }
19765 else
19766 {
19767 half_mode = DImode;
19768 gen_lshr3 = gen_lshrdi3;
19769 gen_and3 = gen_anddi3;
19770 gen_xor3 = gen_xordi3;
19771 bits = 6;
19772 }
19773
19774 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19775 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19776 else
19777 x = gen_lowpart (half_mode, operands[2]);
19778 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19779
19780 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19781 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19782 emit_move_insn (low[0], high[0]);
19783 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19784 }
19785
19786 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19787 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19788 return;
19789 }
19790
19791 if (operands[1] == constm1_rtx)
19792 {
19793 /* For -1 << N, we can avoid the shld instruction, because we
19794 know that we're shifting 0...31/63 ones into a -1. */
19795 emit_move_insn (low[0], constm1_rtx);
19796 if (optimize_insn_for_size_p ())
19797 emit_move_insn (high[0], low[0]);
19798 else
19799 emit_move_insn (high[0], constm1_rtx);
19800 }
19801 else
19802 {
19803 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19804
19805 if (!rtx_equal_p (operands[0], operands[1]))
19806 emit_move_insn (operands[0], operands[1]);
19807
19808 split_double_mode (mode, operands, 1, low, high);
19809 emit_insn (gen_shld (high[0], low[0], operands[2]));
19810 }
19811
19812 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19813
19814 if (TARGET_CMOVE && scratch)
19815 {
19816 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19817 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19818
19819 ix86_expand_clear (scratch);
19820 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19821 }
19822 else
19823 {
19824 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19825 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19826
19827 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19828 }
19829 }
19830
19831 void
19832 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19833 {
19834 rtx (*gen_ashr3)(rtx, rtx, rtx)
19835 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19836 rtx (*gen_shrd)(rtx, rtx, rtx);
19837 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19838
19839 rtx low[2], high[2];
19840 int count;
19841
19842 if (CONST_INT_P (operands[2]))
19843 {
19844 split_double_mode (mode, operands, 2, low, high);
19845 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19846
19847 if (count == GET_MODE_BITSIZE (mode) - 1)
19848 {
19849 emit_move_insn (high[0], high[1]);
19850 emit_insn (gen_ashr3 (high[0], high[0],
19851 GEN_INT (half_width - 1)));
19852 emit_move_insn (low[0], high[0]);
19853
19854 }
19855 else if (count >= half_width)
19856 {
19857 emit_move_insn (low[0], high[1]);
19858 emit_move_insn (high[0], low[0]);
19859 emit_insn (gen_ashr3 (high[0], high[0],
19860 GEN_INT (half_width - 1)));
19861
19862 if (count > half_width)
19863 emit_insn (gen_ashr3 (low[0], low[0],
19864 GEN_INT (count - half_width)));
19865 }
19866 else
19867 {
19868 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19869
19870 if (!rtx_equal_p (operands[0], operands[1]))
19871 emit_move_insn (operands[0], operands[1]);
19872
19873 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19874 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19875 }
19876 }
19877 else
19878 {
19879 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19880
19881 if (!rtx_equal_p (operands[0], operands[1]))
19882 emit_move_insn (operands[0], operands[1]);
19883
19884 split_double_mode (mode, operands, 1, low, high);
19885
19886 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19887 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19888
19889 if (TARGET_CMOVE && scratch)
19890 {
19891 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19892 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19893
19894 emit_move_insn (scratch, high[0]);
19895 emit_insn (gen_ashr3 (scratch, scratch,
19896 GEN_INT (half_width - 1)));
19897 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19898 scratch));
19899 }
19900 else
19901 {
19902 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19903 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19904
19905 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19906 }
19907 }
19908 }
19909
19910 void
19911 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19912 {
19913 rtx (*gen_lshr3)(rtx, rtx, rtx)
19914 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19915 rtx (*gen_shrd)(rtx, rtx, rtx);
19916 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19917
19918 rtx low[2], high[2];
19919 int count;
19920
19921 if (CONST_INT_P (operands[2]))
19922 {
19923 split_double_mode (mode, operands, 2, low, high);
19924 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19925
19926 if (count >= half_width)
19927 {
19928 emit_move_insn (low[0], high[1]);
19929 ix86_expand_clear (high[0]);
19930
19931 if (count > half_width)
19932 emit_insn (gen_lshr3 (low[0], low[0],
19933 GEN_INT (count - half_width)));
19934 }
19935 else
19936 {
19937 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19938
19939 if (!rtx_equal_p (operands[0], operands[1]))
19940 emit_move_insn (operands[0], operands[1]);
19941
19942 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19943 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19944 }
19945 }
19946 else
19947 {
19948 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19949
19950 if (!rtx_equal_p (operands[0], operands[1]))
19951 emit_move_insn (operands[0], operands[1]);
19952
19953 split_double_mode (mode, operands, 1, low, high);
19954
19955 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19956 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19957
19958 if (TARGET_CMOVE && scratch)
19959 {
19960 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19961 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19962
19963 ix86_expand_clear (scratch);
19964 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19965 scratch));
19966 }
19967 else
19968 {
19969 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19970 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19971
19972 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19973 }
19974 }
19975 }
19976
19977 /* Predict just emitted jump instruction to be taken with probability PROB. */
19978 static void
19979 predict_jump (int prob)
19980 {
19981 rtx insn = get_last_insn ();
19982 gcc_assert (JUMP_P (insn));
19983 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19984 }
19985
19986 /* Helper function for the string operations below. Dest VARIABLE whether
19987 it is aligned to VALUE bytes. If true, jump to the label. */
19988 static rtx
19989 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19990 {
19991 rtx label = gen_label_rtx ();
19992 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19993 if (GET_MODE (variable) == DImode)
19994 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19995 else
19996 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19997 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19998 1, label);
19999 if (epilogue)
20000 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20001 else
20002 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20003 return label;
20004 }
20005
20006 /* Adjust COUNTER by the VALUE. */
20007 static void
20008 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20009 {
20010 rtx (*gen_add)(rtx, rtx, rtx)
20011 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20012
20013 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20014 }
20015
20016 /* Zero extend possibly SImode EXP to Pmode register. */
20017 rtx
20018 ix86_zero_extend_to_Pmode (rtx exp)
20019 {
20020 rtx r;
20021 if (GET_MODE (exp) == VOIDmode)
20022 return force_reg (Pmode, exp);
20023 if (GET_MODE (exp) == Pmode)
20024 return copy_to_mode_reg (Pmode, exp);
20025 r = gen_reg_rtx (Pmode);
20026 emit_insn (gen_zero_extendsidi2 (r, exp));
20027 return r;
20028 }
20029
20030 /* Divide COUNTREG by SCALE. */
20031 static rtx
20032 scale_counter (rtx countreg, int scale)
20033 {
20034 rtx sc;
20035
20036 if (scale == 1)
20037 return countreg;
20038 if (CONST_INT_P (countreg))
20039 return GEN_INT (INTVAL (countreg) / scale);
20040 gcc_assert (REG_P (countreg));
20041
20042 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20043 GEN_INT (exact_log2 (scale)),
20044 NULL, 1, OPTAB_DIRECT);
20045 return sc;
20046 }
20047
20048 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20049 DImode for constant loop counts. */
20050
20051 static enum machine_mode
20052 counter_mode (rtx count_exp)
20053 {
20054 if (GET_MODE (count_exp) != VOIDmode)
20055 return GET_MODE (count_exp);
20056 if (!CONST_INT_P (count_exp))
20057 return Pmode;
20058 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20059 return DImode;
20060 return SImode;
20061 }
20062
20063 /* When SRCPTR is non-NULL, output simple loop to move memory
20064 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20065 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20066 equivalent loop to set memory by VALUE (supposed to be in MODE).
20067
20068 The size is rounded down to whole number of chunk size moved at once.
20069 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20070
20071
20072 static void
20073 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20074 rtx destptr, rtx srcptr, rtx value,
20075 rtx count, enum machine_mode mode, int unroll,
20076 int expected_size)
20077 {
20078 rtx out_label, top_label, iter, tmp;
20079 enum machine_mode iter_mode = counter_mode (count);
20080 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20081 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20082 rtx size;
20083 rtx x_addr;
20084 rtx y_addr;
20085 int i;
20086
20087 top_label = gen_label_rtx ();
20088 out_label = gen_label_rtx ();
20089 iter = gen_reg_rtx (iter_mode);
20090
20091 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20092 NULL, 1, OPTAB_DIRECT);
20093 /* Those two should combine. */
20094 if (piece_size == const1_rtx)
20095 {
20096 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20097 true, out_label);
20098 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20099 }
20100 emit_move_insn (iter, const0_rtx);
20101
20102 emit_label (top_label);
20103
20104 tmp = convert_modes (Pmode, iter_mode, iter, true);
20105 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20106 destmem = change_address (destmem, mode, x_addr);
20107
20108 if (srcmem)
20109 {
20110 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20111 srcmem = change_address (srcmem, mode, y_addr);
20112
20113 /* When unrolling for chips that reorder memory reads and writes,
20114 we can save registers by using single temporary.
20115 Also using 4 temporaries is overkill in 32bit mode. */
20116 if (!TARGET_64BIT && 0)
20117 {
20118 for (i = 0; i < unroll; i++)
20119 {
20120 if (i)
20121 {
20122 destmem =
20123 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20124 srcmem =
20125 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20126 }
20127 emit_move_insn (destmem, srcmem);
20128 }
20129 }
20130 else
20131 {
20132 rtx tmpreg[4];
20133 gcc_assert (unroll <= 4);
20134 for (i = 0; i < unroll; i++)
20135 {
20136 tmpreg[i] = gen_reg_rtx (mode);
20137 if (i)
20138 {
20139 srcmem =
20140 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20141 }
20142 emit_move_insn (tmpreg[i], srcmem);
20143 }
20144 for (i = 0; i < unroll; i++)
20145 {
20146 if (i)
20147 {
20148 destmem =
20149 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20150 }
20151 emit_move_insn (destmem, tmpreg[i]);
20152 }
20153 }
20154 }
20155 else
20156 for (i = 0; i < unroll; i++)
20157 {
20158 if (i)
20159 destmem =
20160 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20161 emit_move_insn (destmem, value);
20162 }
20163
20164 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20165 true, OPTAB_LIB_WIDEN);
20166 if (tmp != iter)
20167 emit_move_insn (iter, tmp);
20168
20169 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20170 true, top_label);
20171 if (expected_size != -1)
20172 {
20173 expected_size /= GET_MODE_SIZE (mode) * unroll;
20174 if (expected_size == 0)
20175 predict_jump (0);
20176 else if (expected_size > REG_BR_PROB_BASE)
20177 predict_jump (REG_BR_PROB_BASE - 1);
20178 else
20179 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20180 }
20181 else
20182 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20183 iter = ix86_zero_extend_to_Pmode (iter);
20184 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20185 true, OPTAB_LIB_WIDEN);
20186 if (tmp != destptr)
20187 emit_move_insn (destptr, tmp);
20188 if (srcptr)
20189 {
20190 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20191 true, OPTAB_LIB_WIDEN);
20192 if (tmp != srcptr)
20193 emit_move_insn (srcptr, tmp);
20194 }
20195 emit_label (out_label);
20196 }
20197
20198 /* Output "rep; mov" instruction.
20199 Arguments have same meaning as for previous function */
20200 static void
20201 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20202 rtx destptr, rtx srcptr,
20203 rtx count,
20204 enum machine_mode mode)
20205 {
20206 rtx destexp;
20207 rtx srcexp;
20208 rtx countreg;
20209
20210 /* If the size is known, it is shorter to use rep movs. */
20211 if (mode == QImode && CONST_INT_P (count)
20212 && !(INTVAL (count) & 3))
20213 mode = SImode;
20214
20215 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20216 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20217 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20218 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20219 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20220 if (mode != QImode)
20221 {
20222 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20223 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20224 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20225 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20226 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20227 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20228 }
20229 else
20230 {
20231 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20232 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20233 }
20234 if (CONST_INT_P (count))
20235 {
20236 count = GEN_INT (INTVAL (count)
20237 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20238 destmem = shallow_copy_rtx (destmem);
20239 srcmem = shallow_copy_rtx (srcmem);
20240 set_mem_size (destmem, count);
20241 set_mem_size (srcmem, count);
20242 }
20243 else
20244 {
20245 if (MEM_SIZE (destmem))
20246 set_mem_size (destmem, NULL_RTX);
20247 if (MEM_SIZE (srcmem))
20248 set_mem_size (srcmem, NULL_RTX);
20249 }
20250 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20251 destexp, srcexp));
20252 }
20253
20254 /* Output "rep; stos" instruction.
20255 Arguments have same meaning as for previous function */
20256 static void
20257 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20258 rtx count, enum machine_mode mode,
20259 rtx orig_value)
20260 {
20261 rtx destexp;
20262 rtx countreg;
20263
20264 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20265 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20266 value = force_reg (mode, gen_lowpart (mode, value));
20267 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20268 if (mode != QImode)
20269 {
20270 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20271 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20272 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20273 }
20274 else
20275 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20276 if (orig_value == const0_rtx && CONST_INT_P (count))
20277 {
20278 count = GEN_INT (INTVAL (count)
20279 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20280 destmem = shallow_copy_rtx (destmem);
20281 set_mem_size (destmem, count);
20282 }
20283 else if (MEM_SIZE (destmem))
20284 set_mem_size (destmem, NULL_RTX);
20285 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20286 }
20287
20288 static void
20289 emit_strmov (rtx destmem, rtx srcmem,
20290 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20291 {
20292 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20293 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20294 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20295 }
20296
20297 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20298 static void
20299 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20300 rtx destptr, rtx srcptr, rtx count, int max_size)
20301 {
20302 rtx src, dest;
20303 if (CONST_INT_P (count))
20304 {
20305 HOST_WIDE_INT countval = INTVAL (count);
20306 int offset = 0;
20307
20308 if ((countval & 0x10) && max_size > 16)
20309 {
20310 if (TARGET_64BIT)
20311 {
20312 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20313 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20314 }
20315 else
20316 gcc_unreachable ();
20317 offset += 16;
20318 }
20319 if ((countval & 0x08) && max_size > 8)
20320 {
20321 if (TARGET_64BIT)
20322 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20323 else
20324 {
20325 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20326 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20327 }
20328 offset += 8;
20329 }
20330 if ((countval & 0x04) && max_size > 4)
20331 {
20332 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20333 offset += 4;
20334 }
20335 if ((countval & 0x02) && max_size > 2)
20336 {
20337 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20338 offset += 2;
20339 }
20340 if ((countval & 0x01) && max_size > 1)
20341 {
20342 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20343 offset += 1;
20344 }
20345 return;
20346 }
20347 if (max_size > 8)
20348 {
20349 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20350 count, 1, OPTAB_DIRECT);
20351 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20352 count, QImode, 1, 4);
20353 return;
20354 }
20355
20356 /* When there are stringops, we can cheaply increase dest and src pointers.
20357 Otherwise we save code size by maintaining offset (zero is readily
20358 available from preceding rep operation) and using x86 addressing modes.
20359 */
20360 if (TARGET_SINGLE_STRINGOP)
20361 {
20362 if (max_size > 4)
20363 {
20364 rtx label = ix86_expand_aligntest (count, 4, true);
20365 src = change_address (srcmem, SImode, srcptr);
20366 dest = change_address (destmem, SImode, destptr);
20367 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20368 emit_label (label);
20369 LABEL_NUSES (label) = 1;
20370 }
20371 if (max_size > 2)
20372 {
20373 rtx label = ix86_expand_aligntest (count, 2, true);
20374 src = change_address (srcmem, HImode, srcptr);
20375 dest = change_address (destmem, HImode, destptr);
20376 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20377 emit_label (label);
20378 LABEL_NUSES (label) = 1;
20379 }
20380 if (max_size > 1)
20381 {
20382 rtx label = ix86_expand_aligntest (count, 1, true);
20383 src = change_address (srcmem, QImode, srcptr);
20384 dest = change_address (destmem, QImode, destptr);
20385 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20386 emit_label (label);
20387 LABEL_NUSES (label) = 1;
20388 }
20389 }
20390 else
20391 {
20392 rtx offset = force_reg (Pmode, const0_rtx);
20393 rtx tmp;
20394
20395 if (max_size > 4)
20396 {
20397 rtx label = ix86_expand_aligntest (count, 4, true);
20398 src = change_address (srcmem, SImode, srcptr);
20399 dest = change_address (destmem, SImode, destptr);
20400 emit_move_insn (dest, src);
20401 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20402 true, OPTAB_LIB_WIDEN);
20403 if (tmp != offset)
20404 emit_move_insn (offset, tmp);
20405 emit_label (label);
20406 LABEL_NUSES (label) = 1;
20407 }
20408 if (max_size > 2)
20409 {
20410 rtx label = ix86_expand_aligntest (count, 2, true);
20411 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20412 src = change_address (srcmem, HImode, tmp);
20413 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20414 dest = change_address (destmem, HImode, tmp);
20415 emit_move_insn (dest, src);
20416 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20417 true, OPTAB_LIB_WIDEN);
20418 if (tmp != offset)
20419 emit_move_insn (offset, tmp);
20420 emit_label (label);
20421 LABEL_NUSES (label) = 1;
20422 }
20423 if (max_size > 1)
20424 {
20425 rtx label = ix86_expand_aligntest (count, 1, true);
20426 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20427 src = change_address (srcmem, QImode, tmp);
20428 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20429 dest = change_address (destmem, QImode, tmp);
20430 emit_move_insn (dest, src);
20431 emit_label (label);
20432 LABEL_NUSES (label) = 1;
20433 }
20434 }
20435 }
20436
20437 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20438 static void
20439 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20440 rtx count, int max_size)
20441 {
20442 count =
20443 expand_simple_binop (counter_mode (count), AND, count,
20444 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20445 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20446 gen_lowpart (QImode, value), count, QImode,
20447 1, max_size / 2);
20448 }
20449
20450 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20451 static void
20452 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20453 {
20454 rtx dest;
20455
20456 if (CONST_INT_P (count))
20457 {
20458 HOST_WIDE_INT countval = INTVAL (count);
20459 int offset = 0;
20460
20461 if ((countval & 0x10) && max_size > 16)
20462 {
20463 if (TARGET_64BIT)
20464 {
20465 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20466 emit_insn (gen_strset (destptr, dest, value));
20467 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20468 emit_insn (gen_strset (destptr, dest, value));
20469 }
20470 else
20471 gcc_unreachable ();
20472 offset += 16;
20473 }
20474 if ((countval & 0x08) && max_size > 8)
20475 {
20476 if (TARGET_64BIT)
20477 {
20478 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20479 emit_insn (gen_strset (destptr, dest, value));
20480 }
20481 else
20482 {
20483 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20484 emit_insn (gen_strset (destptr, dest, value));
20485 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20486 emit_insn (gen_strset (destptr, dest, value));
20487 }
20488 offset += 8;
20489 }
20490 if ((countval & 0x04) && max_size > 4)
20491 {
20492 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20493 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20494 offset += 4;
20495 }
20496 if ((countval & 0x02) && max_size > 2)
20497 {
20498 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20499 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20500 offset += 2;
20501 }
20502 if ((countval & 0x01) && max_size > 1)
20503 {
20504 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20505 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20506 offset += 1;
20507 }
20508 return;
20509 }
20510 if (max_size > 32)
20511 {
20512 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20513 return;
20514 }
20515 if (max_size > 16)
20516 {
20517 rtx label = ix86_expand_aligntest (count, 16, true);
20518 if (TARGET_64BIT)
20519 {
20520 dest = change_address (destmem, DImode, destptr);
20521 emit_insn (gen_strset (destptr, dest, value));
20522 emit_insn (gen_strset (destptr, dest, value));
20523 }
20524 else
20525 {
20526 dest = change_address (destmem, SImode, destptr);
20527 emit_insn (gen_strset (destptr, dest, value));
20528 emit_insn (gen_strset (destptr, dest, value));
20529 emit_insn (gen_strset (destptr, dest, value));
20530 emit_insn (gen_strset (destptr, dest, value));
20531 }
20532 emit_label (label);
20533 LABEL_NUSES (label) = 1;
20534 }
20535 if (max_size > 8)
20536 {
20537 rtx label = ix86_expand_aligntest (count, 8, true);
20538 if (TARGET_64BIT)
20539 {
20540 dest = change_address (destmem, DImode, destptr);
20541 emit_insn (gen_strset (destptr, dest, value));
20542 }
20543 else
20544 {
20545 dest = change_address (destmem, SImode, destptr);
20546 emit_insn (gen_strset (destptr, dest, value));
20547 emit_insn (gen_strset (destptr, dest, value));
20548 }
20549 emit_label (label);
20550 LABEL_NUSES (label) = 1;
20551 }
20552 if (max_size > 4)
20553 {
20554 rtx label = ix86_expand_aligntest (count, 4, true);
20555 dest = change_address (destmem, SImode, destptr);
20556 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20557 emit_label (label);
20558 LABEL_NUSES (label) = 1;
20559 }
20560 if (max_size > 2)
20561 {
20562 rtx label = ix86_expand_aligntest (count, 2, true);
20563 dest = change_address (destmem, HImode, destptr);
20564 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20565 emit_label (label);
20566 LABEL_NUSES (label) = 1;
20567 }
20568 if (max_size > 1)
20569 {
20570 rtx label = ix86_expand_aligntest (count, 1, true);
20571 dest = change_address (destmem, QImode, destptr);
20572 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20573 emit_label (label);
20574 LABEL_NUSES (label) = 1;
20575 }
20576 }
20577
20578 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20579 DESIRED_ALIGNMENT. */
20580 static void
20581 expand_movmem_prologue (rtx destmem, rtx srcmem,
20582 rtx destptr, rtx srcptr, rtx count,
20583 int align, int desired_alignment)
20584 {
20585 if (align <= 1 && desired_alignment > 1)
20586 {
20587 rtx label = ix86_expand_aligntest (destptr, 1, false);
20588 srcmem = change_address (srcmem, QImode, srcptr);
20589 destmem = change_address (destmem, QImode, destptr);
20590 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20591 ix86_adjust_counter (count, 1);
20592 emit_label (label);
20593 LABEL_NUSES (label) = 1;
20594 }
20595 if (align <= 2 && desired_alignment > 2)
20596 {
20597 rtx label = ix86_expand_aligntest (destptr, 2, false);
20598 srcmem = change_address (srcmem, HImode, srcptr);
20599 destmem = change_address (destmem, HImode, destptr);
20600 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20601 ix86_adjust_counter (count, 2);
20602 emit_label (label);
20603 LABEL_NUSES (label) = 1;
20604 }
20605 if (align <= 4 && desired_alignment > 4)
20606 {
20607 rtx label = ix86_expand_aligntest (destptr, 4, false);
20608 srcmem = change_address (srcmem, SImode, srcptr);
20609 destmem = change_address (destmem, SImode, destptr);
20610 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20611 ix86_adjust_counter (count, 4);
20612 emit_label (label);
20613 LABEL_NUSES (label) = 1;
20614 }
20615 gcc_assert (desired_alignment <= 8);
20616 }
20617
20618 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20619 ALIGN_BYTES is how many bytes need to be copied. */
20620 static rtx
20621 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20622 int desired_align, int align_bytes)
20623 {
20624 rtx src = *srcp;
20625 rtx src_size, dst_size;
20626 int off = 0;
20627 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20628 if (src_align_bytes >= 0)
20629 src_align_bytes = desired_align - src_align_bytes;
20630 src_size = MEM_SIZE (src);
20631 dst_size = MEM_SIZE (dst);
20632 if (align_bytes & 1)
20633 {
20634 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20635 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20636 off = 1;
20637 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20638 }
20639 if (align_bytes & 2)
20640 {
20641 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20642 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20643 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20644 set_mem_align (dst, 2 * BITS_PER_UNIT);
20645 if (src_align_bytes >= 0
20646 && (src_align_bytes & 1) == (align_bytes & 1)
20647 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20648 set_mem_align (src, 2 * BITS_PER_UNIT);
20649 off = 2;
20650 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20651 }
20652 if (align_bytes & 4)
20653 {
20654 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20655 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20656 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20657 set_mem_align (dst, 4 * BITS_PER_UNIT);
20658 if (src_align_bytes >= 0)
20659 {
20660 unsigned int src_align = 0;
20661 if ((src_align_bytes & 3) == (align_bytes & 3))
20662 src_align = 4;
20663 else if ((src_align_bytes & 1) == (align_bytes & 1))
20664 src_align = 2;
20665 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20666 set_mem_align (src, src_align * BITS_PER_UNIT);
20667 }
20668 off = 4;
20669 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20670 }
20671 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20672 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20673 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20674 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20675 if (src_align_bytes >= 0)
20676 {
20677 unsigned int src_align = 0;
20678 if ((src_align_bytes & 7) == (align_bytes & 7))
20679 src_align = 8;
20680 else if ((src_align_bytes & 3) == (align_bytes & 3))
20681 src_align = 4;
20682 else if ((src_align_bytes & 1) == (align_bytes & 1))
20683 src_align = 2;
20684 if (src_align > (unsigned int) desired_align)
20685 src_align = desired_align;
20686 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20687 set_mem_align (src, src_align * BITS_PER_UNIT);
20688 }
20689 if (dst_size)
20690 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20691 if (src_size)
20692 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20693 *srcp = src;
20694 return dst;
20695 }
20696
20697 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20698 DESIRED_ALIGNMENT. */
20699 static void
20700 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20701 int align, int desired_alignment)
20702 {
20703 if (align <= 1 && desired_alignment > 1)
20704 {
20705 rtx label = ix86_expand_aligntest (destptr, 1, false);
20706 destmem = change_address (destmem, QImode, destptr);
20707 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20708 ix86_adjust_counter (count, 1);
20709 emit_label (label);
20710 LABEL_NUSES (label) = 1;
20711 }
20712 if (align <= 2 && desired_alignment > 2)
20713 {
20714 rtx label = ix86_expand_aligntest (destptr, 2, false);
20715 destmem = change_address (destmem, HImode, destptr);
20716 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20717 ix86_adjust_counter (count, 2);
20718 emit_label (label);
20719 LABEL_NUSES (label) = 1;
20720 }
20721 if (align <= 4 && desired_alignment > 4)
20722 {
20723 rtx label = ix86_expand_aligntest (destptr, 4, false);
20724 destmem = change_address (destmem, SImode, destptr);
20725 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20726 ix86_adjust_counter (count, 4);
20727 emit_label (label);
20728 LABEL_NUSES (label) = 1;
20729 }
20730 gcc_assert (desired_alignment <= 8);
20731 }
20732
20733 /* Set enough from DST to align DST known to by aligned by ALIGN to
20734 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20735 static rtx
20736 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20737 int desired_align, int align_bytes)
20738 {
20739 int off = 0;
20740 rtx dst_size = MEM_SIZE (dst);
20741 if (align_bytes & 1)
20742 {
20743 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20744 off = 1;
20745 emit_insn (gen_strset (destreg, dst,
20746 gen_lowpart (QImode, value)));
20747 }
20748 if (align_bytes & 2)
20749 {
20750 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20751 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20752 set_mem_align (dst, 2 * BITS_PER_UNIT);
20753 off = 2;
20754 emit_insn (gen_strset (destreg, dst,
20755 gen_lowpart (HImode, value)));
20756 }
20757 if (align_bytes & 4)
20758 {
20759 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20760 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20761 set_mem_align (dst, 4 * BITS_PER_UNIT);
20762 off = 4;
20763 emit_insn (gen_strset (destreg, dst,
20764 gen_lowpart (SImode, value)));
20765 }
20766 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20767 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20768 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20769 if (dst_size)
20770 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20771 return dst;
20772 }
20773
20774 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20775 static enum stringop_alg
20776 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20777 int *dynamic_check)
20778 {
20779 const struct stringop_algs * algs;
20780 bool optimize_for_speed;
20781 /* Algorithms using the rep prefix want at least edi and ecx;
20782 additionally, memset wants eax and memcpy wants esi. Don't
20783 consider such algorithms if the user has appropriated those
20784 registers for their own purposes. */
20785 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20786 || (memset
20787 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20788
20789 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20790 || (alg != rep_prefix_1_byte \
20791 && alg != rep_prefix_4_byte \
20792 && alg != rep_prefix_8_byte))
20793 const struct processor_costs *cost;
20794
20795 /* Even if the string operation call is cold, we still might spend a lot
20796 of time processing large blocks. */
20797 if (optimize_function_for_size_p (cfun)
20798 || (optimize_insn_for_size_p ()
20799 && expected_size != -1 && expected_size < 256))
20800 optimize_for_speed = false;
20801 else
20802 optimize_for_speed = true;
20803
20804 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20805
20806 *dynamic_check = -1;
20807 if (memset)
20808 algs = &cost->memset[TARGET_64BIT != 0];
20809 else
20810 algs = &cost->memcpy[TARGET_64BIT != 0];
20811 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20812 return stringop_alg;
20813 /* rep; movq or rep; movl is the smallest variant. */
20814 else if (!optimize_for_speed)
20815 {
20816 if (!count || (count & 3))
20817 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20818 else
20819 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20820 }
20821 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20822 */
20823 else if (expected_size != -1 && expected_size < 4)
20824 return loop_1_byte;
20825 else if (expected_size != -1)
20826 {
20827 unsigned int i;
20828 enum stringop_alg alg = libcall;
20829 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20830 {
20831 /* We get here if the algorithms that were not libcall-based
20832 were rep-prefix based and we are unable to use rep prefixes
20833 based on global register usage. Break out of the loop and
20834 use the heuristic below. */
20835 if (algs->size[i].max == 0)
20836 break;
20837 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20838 {
20839 enum stringop_alg candidate = algs->size[i].alg;
20840
20841 if (candidate != libcall && ALG_USABLE_P (candidate))
20842 alg = candidate;
20843 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20844 last non-libcall inline algorithm. */
20845 if (TARGET_INLINE_ALL_STRINGOPS)
20846 {
20847 /* When the current size is best to be copied by a libcall,
20848 but we are still forced to inline, run the heuristic below
20849 that will pick code for medium sized blocks. */
20850 if (alg != libcall)
20851 return alg;
20852 break;
20853 }
20854 else if (ALG_USABLE_P (candidate))
20855 return candidate;
20856 }
20857 }
20858 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20859 }
20860 /* When asked to inline the call anyway, try to pick meaningful choice.
20861 We look for maximal size of block that is faster to copy by hand and
20862 take blocks of at most of that size guessing that average size will
20863 be roughly half of the block.
20864
20865 If this turns out to be bad, we might simply specify the preferred
20866 choice in ix86_costs. */
20867 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20868 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20869 {
20870 int max = -1;
20871 enum stringop_alg alg;
20872 int i;
20873 bool any_alg_usable_p = true;
20874
20875 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20876 {
20877 enum stringop_alg candidate = algs->size[i].alg;
20878 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20879
20880 if (candidate != libcall && candidate
20881 && ALG_USABLE_P (candidate))
20882 max = algs->size[i].max;
20883 }
20884 /* If there aren't any usable algorithms, then recursing on
20885 smaller sizes isn't going to find anything. Just return the
20886 simple byte-at-a-time copy loop. */
20887 if (!any_alg_usable_p)
20888 {
20889 /* Pick something reasonable. */
20890 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20891 *dynamic_check = 128;
20892 return loop_1_byte;
20893 }
20894 if (max == -1)
20895 max = 4096;
20896 alg = decide_alg (count, max / 2, memset, dynamic_check);
20897 gcc_assert (*dynamic_check == -1);
20898 gcc_assert (alg != libcall);
20899 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20900 *dynamic_check = max;
20901 return alg;
20902 }
20903 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20904 #undef ALG_USABLE_P
20905 }
20906
20907 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20908 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20909 static int
20910 decide_alignment (int align,
20911 enum stringop_alg alg,
20912 int expected_size)
20913 {
20914 int desired_align = 0;
20915 switch (alg)
20916 {
20917 case no_stringop:
20918 gcc_unreachable ();
20919 case loop:
20920 case unrolled_loop:
20921 desired_align = GET_MODE_SIZE (Pmode);
20922 break;
20923 case rep_prefix_8_byte:
20924 desired_align = 8;
20925 break;
20926 case rep_prefix_4_byte:
20927 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20928 copying whole cacheline at once. */
20929 if (TARGET_PENTIUMPRO)
20930 desired_align = 8;
20931 else
20932 desired_align = 4;
20933 break;
20934 case rep_prefix_1_byte:
20935 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20936 copying whole cacheline at once. */
20937 if (TARGET_PENTIUMPRO)
20938 desired_align = 8;
20939 else
20940 desired_align = 1;
20941 break;
20942 case loop_1_byte:
20943 desired_align = 1;
20944 break;
20945 case libcall:
20946 return 0;
20947 }
20948
20949 if (optimize_size)
20950 desired_align = 1;
20951 if (desired_align < align)
20952 desired_align = align;
20953 if (expected_size != -1 && expected_size < 4)
20954 desired_align = align;
20955 return desired_align;
20956 }
20957
20958 /* Return the smallest power of 2 greater than VAL. */
20959 static int
20960 smallest_pow2_greater_than (int val)
20961 {
20962 int ret = 1;
20963 while (ret <= val)
20964 ret <<= 1;
20965 return ret;
20966 }
20967
20968 /* Expand string move (memcpy) operation. Use i386 string operations
20969 when profitable. expand_setmem contains similar code. The code
20970 depends upon architecture, block size and alignment, but always has
20971 the same overall structure:
20972
20973 1) Prologue guard: Conditional that jumps up to epilogues for small
20974 blocks that can be handled by epilogue alone. This is faster
20975 but also needed for correctness, since prologue assume the block
20976 is larger than the desired alignment.
20977
20978 Optional dynamic check for size and libcall for large
20979 blocks is emitted here too, with -minline-stringops-dynamically.
20980
20981 2) Prologue: copy first few bytes in order to get destination
20982 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20983 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20984 copied. We emit either a jump tree on power of two sized
20985 blocks, or a byte loop.
20986
20987 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20988 with specified algorithm.
20989
20990 4) Epilogue: code copying tail of the block that is too small to be
20991 handled by main body (or up to size guarded by prologue guard). */
20992
20993 bool
20994 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20995 rtx expected_align_exp, rtx expected_size_exp)
20996 {
20997 rtx destreg;
20998 rtx srcreg;
20999 rtx label = NULL;
21000 rtx tmp;
21001 rtx jump_around_label = NULL;
21002 HOST_WIDE_INT align = 1;
21003 unsigned HOST_WIDE_INT count = 0;
21004 HOST_WIDE_INT expected_size = -1;
21005 int size_needed = 0, epilogue_size_needed;
21006 int desired_align = 0, align_bytes = 0;
21007 enum stringop_alg alg;
21008 int dynamic_check;
21009 bool need_zero_guard = false;
21010
21011 if (CONST_INT_P (align_exp))
21012 align = INTVAL (align_exp);
21013 /* i386 can do misaligned access on reasonably increased cost. */
21014 if (CONST_INT_P (expected_align_exp)
21015 && INTVAL (expected_align_exp) > align)
21016 align = INTVAL (expected_align_exp);
21017 /* ALIGN is the minimum of destination and source alignment, but we care here
21018 just about destination alignment. */
21019 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21020 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21021
21022 if (CONST_INT_P (count_exp))
21023 count = expected_size = INTVAL (count_exp);
21024 if (CONST_INT_P (expected_size_exp) && count == 0)
21025 expected_size = INTVAL (expected_size_exp);
21026
21027 /* Make sure we don't need to care about overflow later on. */
21028 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21029 return false;
21030
21031 /* Step 0: Decide on preferred algorithm, desired alignment and
21032 size of chunks to be copied by main loop. */
21033
21034 alg = decide_alg (count, expected_size, false, &dynamic_check);
21035 desired_align = decide_alignment (align, alg, expected_size);
21036
21037 if (!TARGET_ALIGN_STRINGOPS)
21038 align = desired_align;
21039
21040 if (alg == libcall)
21041 return false;
21042 gcc_assert (alg != no_stringop);
21043 if (!count)
21044 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21045 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21046 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21047 switch (alg)
21048 {
21049 case libcall:
21050 case no_stringop:
21051 gcc_unreachable ();
21052 case loop:
21053 need_zero_guard = true;
21054 size_needed = GET_MODE_SIZE (Pmode);
21055 break;
21056 case unrolled_loop:
21057 need_zero_guard = true;
21058 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21059 break;
21060 case rep_prefix_8_byte:
21061 size_needed = 8;
21062 break;
21063 case rep_prefix_4_byte:
21064 size_needed = 4;
21065 break;
21066 case rep_prefix_1_byte:
21067 size_needed = 1;
21068 break;
21069 case loop_1_byte:
21070 need_zero_guard = true;
21071 size_needed = 1;
21072 break;
21073 }
21074
21075 epilogue_size_needed = size_needed;
21076
21077 /* Step 1: Prologue guard. */
21078
21079 /* Alignment code needs count to be in register. */
21080 if (CONST_INT_P (count_exp) && desired_align > align)
21081 {
21082 if (INTVAL (count_exp) > desired_align
21083 && INTVAL (count_exp) > size_needed)
21084 {
21085 align_bytes
21086 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21087 if (align_bytes <= 0)
21088 align_bytes = 0;
21089 else
21090 align_bytes = desired_align - align_bytes;
21091 }
21092 if (align_bytes == 0)
21093 count_exp = force_reg (counter_mode (count_exp), count_exp);
21094 }
21095 gcc_assert (desired_align >= 1 && align >= 1);
21096
21097 /* Ensure that alignment prologue won't copy past end of block. */
21098 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21099 {
21100 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21101 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21102 Make sure it is power of 2. */
21103 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21104
21105 if (count)
21106 {
21107 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21108 {
21109 /* If main algorithm works on QImode, no epilogue is needed.
21110 For small sizes just don't align anything. */
21111 if (size_needed == 1)
21112 desired_align = align;
21113 else
21114 goto epilogue;
21115 }
21116 }
21117 else
21118 {
21119 label = gen_label_rtx ();
21120 emit_cmp_and_jump_insns (count_exp,
21121 GEN_INT (epilogue_size_needed),
21122 LTU, 0, counter_mode (count_exp), 1, label);
21123 if (expected_size == -1 || expected_size < epilogue_size_needed)
21124 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21125 else
21126 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21127 }
21128 }
21129
21130 /* Emit code to decide on runtime whether library call or inline should be
21131 used. */
21132 if (dynamic_check != -1)
21133 {
21134 if (CONST_INT_P (count_exp))
21135 {
21136 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21137 {
21138 emit_block_move_via_libcall (dst, src, count_exp, false);
21139 count_exp = const0_rtx;
21140 goto epilogue;
21141 }
21142 }
21143 else
21144 {
21145 rtx hot_label = gen_label_rtx ();
21146 jump_around_label = gen_label_rtx ();
21147 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21148 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21149 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21150 emit_block_move_via_libcall (dst, src, count_exp, false);
21151 emit_jump (jump_around_label);
21152 emit_label (hot_label);
21153 }
21154 }
21155
21156 /* Step 2: Alignment prologue. */
21157
21158 if (desired_align > align)
21159 {
21160 if (align_bytes == 0)
21161 {
21162 /* Except for the first move in epilogue, we no longer know
21163 constant offset in aliasing info. It don't seems to worth
21164 the pain to maintain it for the first move, so throw away
21165 the info early. */
21166 src = change_address (src, BLKmode, srcreg);
21167 dst = change_address (dst, BLKmode, destreg);
21168 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21169 desired_align);
21170 }
21171 else
21172 {
21173 /* If we know how many bytes need to be stored before dst is
21174 sufficiently aligned, maintain aliasing info accurately. */
21175 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21176 desired_align, align_bytes);
21177 count_exp = plus_constant (count_exp, -align_bytes);
21178 count -= align_bytes;
21179 }
21180 if (need_zero_guard
21181 && (count < (unsigned HOST_WIDE_INT) size_needed
21182 || (align_bytes == 0
21183 && count < ((unsigned HOST_WIDE_INT) size_needed
21184 + desired_align - align))))
21185 {
21186 /* It is possible that we copied enough so the main loop will not
21187 execute. */
21188 gcc_assert (size_needed > 1);
21189 if (label == NULL_RTX)
21190 label = gen_label_rtx ();
21191 emit_cmp_and_jump_insns (count_exp,
21192 GEN_INT (size_needed),
21193 LTU, 0, counter_mode (count_exp), 1, label);
21194 if (expected_size == -1
21195 || expected_size < (desired_align - align) / 2 + size_needed)
21196 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21197 else
21198 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21199 }
21200 }
21201 if (label && size_needed == 1)
21202 {
21203 emit_label (label);
21204 LABEL_NUSES (label) = 1;
21205 label = NULL;
21206 epilogue_size_needed = 1;
21207 }
21208 else if (label == NULL_RTX)
21209 epilogue_size_needed = size_needed;
21210
21211 /* Step 3: Main loop. */
21212
21213 switch (alg)
21214 {
21215 case libcall:
21216 case no_stringop:
21217 gcc_unreachable ();
21218 case loop_1_byte:
21219 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21220 count_exp, QImode, 1, expected_size);
21221 break;
21222 case loop:
21223 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21224 count_exp, Pmode, 1, expected_size);
21225 break;
21226 case unrolled_loop:
21227 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21228 registers for 4 temporaries anyway. */
21229 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21230 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21231 expected_size);
21232 break;
21233 case rep_prefix_8_byte:
21234 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21235 DImode);
21236 break;
21237 case rep_prefix_4_byte:
21238 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21239 SImode);
21240 break;
21241 case rep_prefix_1_byte:
21242 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21243 QImode);
21244 break;
21245 }
21246 /* Adjust properly the offset of src and dest memory for aliasing. */
21247 if (CONST_INT_P (count_exp))
21248 {
21249 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21250 (count / size_needed) * size_needed);
21251 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21252 (count / size_needed) * size_needed);
21253 }
21254 else
21255 {
21256 src = change_address (src, BLKmode, srcreg);
21257 dst = change_address (dst, BLKmode, destreg);
21258 }
21259
21260 /* Step 4: Epilogue to copy the remaining bytes. */
21261 epilogue:
21262 if (label)
21263 {
21264 /* When the main loop is done, COUNT_EXP might hold original count,
21265 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21266 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21267 bytes. Compensate if needed. */
21268
21269 if (size_needed < epilogue_size_needed)
21270 {
21271 tmp =
21272 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21273 GEN_INT (size_needed - 1), count_exp, 1,
21274 OPTAB_DIRECT);
21275 if (tmp != count_exp)
21276 emit_move_insn (count_exp, tmp);
21277 }
21278 emit_label (label);
21279 LABEL_NUSES (label) = 1;
21280 }
21281
21282 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21283 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21284 epilogue_size_needed);
21285 if (jump_around_label)
21286 emit_label (jump_around_label);
21287 return true;
21288 }
21289
21290 /* Helper function for memcpy. For QImode value 0xXY produce
21291 0xXYXYXYXY of wide specified by MODE. This is essentially
21292 a * 0x10101010, but we can do slightly better than
21293 synth_mult by unwinding the sequence by hand on CPUs with
21294 slow multiply. */
21295 static rtx
21296 promote_duplicated_reg (enum machine_mode mode, rtx val)
21297 {
21298 enum machine_mode valmode = GET_MODE (val);
21299 rtx tmp;
21300 int nops = mode == DImode ? 3 : 2;
21301
21302 gcc_assert (mode == SImode || mode == DImode);
21303 if (val == const0_rtx)
21304 return copy_to_mode_reg (mode, const0_rtx);
21305 if (CONST_INT_P (val))
21306 {
21307 HOST_WIDE_INT v = INTVAL (val) & 255;
21308
21309 v |= v << 8;
21310 v |= v << 16;
21311 if (mode == DImode)
21312 v |= (v << 16) << 16;
21313 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21314 }
21315
21316 if (valmode == VOIDmode)
21317 valmode = QImode;
21318 if (valmode != QImode)
21319 val = gen_lowpart (QImode, val);
21320 if (mode == QImode)
21321 return val;
21322 if (!TARGET_PARTIAL_REG_STALL)
21323 nops--;
21324 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21325 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21326 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21327 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21328 {
21329 rtx reg = convert_modes (mode, QImode, val, true);
21330 tmp = promote_duplicated_reg (mode, const1_rtx);
21331 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21332 OPTAB_DIRECT);
21333 }
21334 else
21335 {
21336 rtx reg = convert_modes (mode, QImode, val, true);
21337
21338 if (!TARGET_PARTIAL_REG_STALL)
21339 if (mode == SImode)
21340 emit_insn (gen_movsi_insv_1 (reg, reg));
21341 else
21342 emit_insn (gen_movdi_insv_1 (reg, reg));
21343 else
21344 {
21345 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21346 NULL, 1, OPTAB_DIRECT);
21347 reg =
21348 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21349 }
21350 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21351 NULL, 1, OPTAB_DIRECT);
21352 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21353 if (mode == SImode)
21354 return reg;
21355 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21356 NULL, 1, OPTAB_DIRECT);
21357 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21358 return reg;
21359 }
21360 }
21361
21362 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21363 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21364 alignment from ALIGN to DESIRED_ALIGN. */
21365 static rtx
21366 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21367 {
21368 rtx promoted_val;
21369
21370 if (TARGET_64BIT
21371 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21372 promoted_val = promote_duplicated_reg (DImode, val);
21373 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21374 promoted_val = promote_duplicated_reg (SImode, val);
21375 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21376 promoted_val = promote_duplicated_reg (HImode, val);
21377 else
21378 promoted_val = val;
21379
21380 return promoted_val;
21381 }
21382
21383 /* Expand string clear operation (bzero). Use i386 string operations when
21384 profitable. See expand_movmem comment for explanation of individual
21385 steps performed. */
21386 bool
21387 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21388 rtx expected_align_exp, rtx expected_size_exp)
21389 {
21390 rtx destreg;
21391 rtx label = NULL;
21392 rtx tmp;
21393 rtx jump_around_label = NULL;
21394 HOST_WIDE_INT align = 1;
21395 unsigned HOST_WIDE_INT count = 0;
21396 HOST_WIDE_INT expected_size = -1;
21397 int size_needed = 0, epilogue_size_needed;
21398 int desired_align = 0, align_bytes = 0;
21399 enum stringop_alg alg;
21400 rtx promoted_val = NULL;
21401 bool force_loopy_epilogue = false;
21402 int dynamic_check;
21403 bool need_zero_guard = false;
21404
21405 if (CONST_INT_P (align_exp))
21406 align = INTVAL (align_exp);
21407 /* i386 can do misaligned access on reasonably increased cost. */
21408 if (CONST_INT_P (expected_align_exp)
21409 && INTVAL (expected_align_exp) > align)
21410 align = INTVAL (expected_align_exp);
21411 if (CONST_INT_P (count_exp))
21412 count = expected_size = INTVAL (count_exp);
21413 if (CONST_INT_P (expected_size_exp) && count == 0)
21414 expected_size = INTVAL (expected_size_exp);
21415
21416 /* Make sure we don't need to care about overflow later on. */
21417 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21418 return false;
21419
21420 /* Step 0: Decide on preferred algorithm, desired alignment and
21421 size of chunks to be copied by main loop. */
21422
21423 alg = decide_alg (count, expected_size, true, &dynamic_check);
21424 desired_align = decide_alignment (align, alg, expected_size);
21425
21426 if (!TARGET_ALIGN_STRINGOPS)
21427 align = desired_align;
21428
21429 if (alg == libcall)
21430 return false;
21431 gcc_assert (alg != no_stringop);
21432 if (!count)
21433 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21434 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21435 switch (alg)
21436 {
21437 case libcall:
21438 case no_stringop:
21439 gcc_unreachable ();
21440 case loop:
21441 need_zero_guard = true;
21442 size_needed = GET_MODE_SIZE (Pmode);
21443 break;
21444 case unrolled_loop:
21445 need_zero_guard = true;
21446 size_needed = GET_MODE_SIZE (Pmode) * 4;
21447 break;
21448 case rep_prefix_8_byte:
21449 size_needed = 8;
21450 break;
21451 case rep_prefix_4_byte:
21452 size_needed = 4;
21453 break;
21454 case rep_prefix_1_byte:
21455 size_needed = 1;
21456 break;
21457 case loop_1_byte:
21458 need_zero_guard = true;
21459 size_needed = 1;
21460 break;
21461 }
21462 epilogue_size_needed = size_needed;
21463
21464 /* Step 1: Prologue guard. */
21465
21466 /* Alignment code needs count to be in register. */
21467 if (CONST_INT_P (count_exp) && desired_align > align)
21468 {
21469 if (INTVAL (count_exp) > desired_align
21470 && INTVAL (count_exp) > size_needed)
21471 {
21472 align_bytes
21473 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21474 if (align_bytes <= 0)
21475 align_bytes = 0;
21476 else
21477 align_bytes = desired_align - align_bytes;
21478 }
21479 if (align_bytes == 0)
21480 {
21481 enum machine_mode mode = SImode;
21482 if (TARGET_64BIT && (count & ~0xffffffff))
21483 mode = DImode;
21484 count_exp = force_reg (mode, count_exp);
21485 }
21486 }
21487 /* Do the cheap promotion to allow better CSE across the
21488 main loop and epilogue (ie one load of the big constant in the
21489 front of all code. */
21490 if (CONST_INT_P (val_exp))
21491 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21492 desired_align, align);
21493 /* Ensure that alignment prologue won't copy past end of block. */
21494 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21495 {
21496 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21497 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21498 Make sure it is power of 2. */
21499 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21500
21501 /* To improve performance of small blocks, we jump around the VAL
21502 promoting mode. This mean that if the promoted VAL is not constant,
21503 we might not use it in the epilogue and have to use byte
21504 loop variant. */
21505 if (epilogue_size_needed > 2 && !promoted_val)
21506 force_loopy_epilogue = true;
21507 if (count)
21508 {
21509 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21510 {
21511 /* If main algorithm works on QImode, no epilogue is needed.
21512 For small sizes just don't align anything. */
21513 if (size_needed == 1)
21514 desired_align = align;
21515 else
21516 goto epilogue;
21517 }
21518 }
21519 else
21520 {
21521 label = gen_label_rtx ();
21522 emit_cmp_and_jump_insns (count_exp,
21523 GEN_INT (epilogue_size_needed),
21524 LTU, 0, counter_mode (count_exp), 1, label);
21525 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21526 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21527 else
21528 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21529 }
21530 }
21531 if (dynamic_check != -1)
21532 {
21533 rtx hot_label = gen_label_rtx ();
21534 jump_around_label = gen_label_rtx ();
21535 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21536 LEU, 0, counter_mode (count_exp), 1, hot_label);
21537 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21538 set_storage_via_libcall (dst, count_exp, val_exp, false);
21539 emit_jump (jump_around_label);
21540 emit_label (hot_label);
21541 }
21542
21543 /* Step 2: Alignment prologue. */
21544
21545 /* Do the expensive promotion once we branched off the small blocks. */
21546 if (!promoted_val)
21547 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21548 desired_align, align);
21549 gcc_assert (desired_align >= 1 && align >= 1);
21550
21551 if (desired_align > align)
21552 {
21553 if (align_bytes == 0)
21554 {
21555 /* Except for the first move in epilogue, we no longer know
21556 constant offset in aliasing info. It don't seems to worth
21557 the pain to maintain it for the first move, so throw away
21558 the info early. */
21559 dst = change_address (dst, BLKmode, destreg);
21560 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21561 desired_align);
21562 }
21563 else
21564 {
21565 /* If we know how many bytes need to be stored before dst is
21566 sufficiently aligned, maintain aliasing info accurately. */
21567 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21568 desired_align, align_bytes);
21569 count_exp = plus_constant (count_exp, -align_bytes);
21570 count -= align_bytes;
21571 }
21572 if (need_zero_guard
21573 && (count < (unsigned HOST_WIDE_INT) size_needed
21574 || (align_bytes == 0
21575 && count < ((unsigned HOST_WIDE_INT) size_needed
21576 + desired_align - align))))
21577 {
21578 /* It is possible that we copied enough so the main loop will not
21579 execute. */
21580 gcc_assert (size_needed > 1);
21581 if (label == NULL_RTX)
21582 label = gen_label_rtx ();
21583 emit_cmp_and_jump_insns (count_exp,
21584 GEN_INT (size_needed),
21585 LTU, 0, counter_mode (count_exp), 1, label);
21586 if (expected_size == -1
21587 || expected_size < (desired_align - align) / 2 + size_needed)
21588 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21589 else
21590 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21591 }
21592 }
21593 if (label && size_needed == 1)
21594 {
21595 emit_label (label);
21596 LABEL_NUSES (label) = 1;
21597 label = NULL;
21598 promoted_val = val_exp;
21599 epilogue_size_needed = 1;
21600 }
21601 else if (label == NULL_RTX)
21602 epilogue_size_needed = size_needed;
21603
21604 /* Step 3: Main loop. */
21605
21606 switch (alg)
21607 {
21608 case libcall:
21609 case no_stringop:
21610 gcc_unreachable ();
21611 case loop_1_byte:
21612 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21613 count_exp, QImode, 1, expected_size);
21614 break;
21615 case loop:
21616 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21617 count_exp, Pmode, 1, expected_size);
21618 break;
21619 case unrolled_loop:
21620 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21621 count_exp, Pmode, 4, expected_size);
21622 break;
21623 case rep_prefix_8_byte:
21624 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21625 DImode, val_exp);
21626 break;
21627 case rep_prefix_4_byte:
21628 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21629 SImode, val_exp);
21630 break;
21631 case rep_prefix_1_byte:
21632 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21633 QImode, val_exp);
21634 break;
21635 }
21636 /* Adjust properly the offset of src and dest memory for aliasing. */
21637 if (CONST_INT_P (count_exp))
21638 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21639 (count / size_needed) * size_needed);
21640 else
21641 dst = change_address (dst, BLKmode, destreg);
21642
21643 /* Step 4: Epilogue to copy the remaining bytes. */
21644
21645 if (label)
21646 {
21647 /* When the main loop is done, COUNT_EXP might hold original count,
21648 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21649 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21650 bytes. Compensate if needed. */
21651
21652 if (size_needed < epilogue_size_needed)
21653 {
21654 tmp =
21655 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21656 GEN_INT (size_needed - 1), count_exp, 1,
21657 OPTAB_DIRECT);
21658 if (tmp != count_exp)
21659 emit_move_insn (count_exp, tmp);
21660 }
21661 emit_label (label);
21662 LABEL_NUSES (label) = 1;
21663 }
21664 epilogue:
21665 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21666 {
21667 if (force_loopy_epilogue)
21668 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21669 epilogue_size_needed);
21670 else
21671 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21672 epilogue_size_needed);
21673 }
21674 if (jump_around_label)
21675 emit_label (jump_around_label);
21676 return true;
21677 }
21678
21679 /* Expand the appropriate insns for doing strlen if not just doing
21680 repnz; scasb
21681
21682 out = result, initialized with the start address
21683 align_rtx = alignment of the address.
21684 scratch = scratch register, initialized with the startaddress when
21685 not aligned, otherwise undefined
21686
21687 This is just the body. It needs the initializations mentioned above and
21688 some address computing at the end. These things are done in i386.md. */
21689
21690 static void
21691 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21692 {
21693 int align;
21694 rtx tmp;
21695 rtx align_2_label = NULL_RTX;
21696 rtx align_3_label = NULL_RTX;
21697 rtx align_4_label = gen_label_rtx ();
21698 rtx end_0_label = gen_label_rtx ();
21699 rtx mem;
21700 rtx tmpreg = gen_reg_rtx (SImode);
21701 rtx scratch = gen_reg_rtx (SImode);
21702 rtx cmp;
21703
21704 align = 0;
21705 if (CONST_INT_P (align_rtx))
21706 align = INTVAL (align_rtx);
21707
21708 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21709
21710 /* Is there a known alignment and is it less than 4? */
21711 if (align < 4)
21712 {
21713 rtx scratch1 = gen_reg_rtx (Pmode);
21714 emit_move_insn (scratch1, out);
21715 /* Is there a known alignment and is it not 2? */
21716 if (align != 2)
21717 {
21718 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21719 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21720
21721 /* Leave just the 3 lower bits. */
21722 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21723 NULL_RTX, 0, OPTAB_WIDEN);
21724
21725 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21726 Pmode, 1, align_4_label);
21727 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21728 Pmode, 1, align_2_label);
21729 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21730 Pmode, 1, align_3_label);
21731 }
21732 else
21733 {
21734 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21735 check if is aligned to 4 - byte. */
21736
21737 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21738 NULL_RTX, 0, OPTAB_WIDEN);
21739
21740 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21741 Pmode, 1, align_4_label);
21742 }
21743
21744 mem = change_address (src, QImode, out);
21745
21746 /* Now compare the bytes. */
21747
21748 /* Compare the first n unaligned byte on a byte per byte basis. */
21749 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21750 QImode, 1, end_0_label);
21751
21752 /* Increment the address. */
21753 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21754
21755 /* Not needed with an alignment of 2 */
21756 if (align != 2)
21757 {
21758 emit_label (align_2_label);
21759
21760 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21761 end_0_label);
21762
21763 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21764
21765 emit_label (align_3_label);
21766 }
21767
21768 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21769 end_0_label);
21770
21771 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21772 }
21773
21774 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21775 align this loop. It gives only huge programs, but does not help to
21776 speed up. */
21777 emit_label (align_4_label);
21778
21779 mem = change_address (src, SImode, out);
21780 emit_move_insn (scratch, mem);
21781 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21782
21783 /* This formula yields a nonzero result iff one of the bytes is zero.
21784 This saves three branches inside loop and many cycles. */
21785
21786 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21787 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21788 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21789 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21790 gen_int_mode (0x80808080, SImode)));
21791 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21792 align_4_label);
21793
21794 if (TARGET_CMOVE)
21795 {
21796 rtx reg = gen_reg_rtx (SImode);
21797 rtx reg2 = gen_reg_rtx (Pmode);
21798 emit_move_insn (reg, tmpreg);
21799 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21800
21801 /* If zero is not in the first two bytes, move two bytes forward. */
21802 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21803 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21804 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21805 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21806 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21807 reg,
21808 tmpreg)));
21809 /* Emit lea manually to avoid clobbering of flags. */
21810 emit_insn (gen_rtx_SET (SImode, reg2,
21811 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21812
21813 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21814 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21815 emit_insn (gen_rtx_SET (VOIDmode, out,
21816 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21817 reg2,
21818 out)));
21819 }
21820 else
21821 {
21822 rtx end_2_label = gen_label_rtx ();
21823 /* Is zero in the first two bytes? */
21824
21825 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21826 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21827 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21828 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21829 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21830 pc_rtx);
21831 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21832 JUMP_LABEL (tmp) = end_2_label;
21833
21834 /* Not in the first two. Move two bytes forward. */
21835 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21836 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21837
21838 emit_label (end_2_label);
21839
21840 }
21841
21842 /* Avoid branch in fixing the byte. */
21843 tmpreg = gen_lowpart (QImode, tmpreg);
21844 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21845 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21846 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21847 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21848
21849 emit_label (end_0_label);
21850 }
21851
21852 /* Expand strlen. */
21853
21854 bool
21855 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21856 {
21857 rtx addr, scratch1, scratch2, scratch3, scratch4;
21858
21859 /* The generic case of strlen expander is long. Avoid it's
21860 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21861
21862 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21863 && !TARGET_INLINE_ALL_STRINGOPS
21864 && !optimize_insn_for_size_p ()
21865 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21866 return false;
21867
21868 addr = force_reg (Pmode, XEXP (src, 0));
21869 scratch1 = gen_reg_rtx (Pmode);
21870
21871 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21872 && !optimize_insn_for_size_p ())
21873 {
21874 /* Well it seems that some optimizer does not combine a call like
21875 foo(strlen(bar), strlen(bar));
21876 when the move and the subtraction is done here. It does calculate
21877 the length just once when these instructions are done inside of
21878 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21879 often used and I use one fewer register for the lifetime of
21880 output_strlen_unroll() this is better. */
21881
21882 emit_move_insn (out, addr);
21883
21884 ix86_expand_strlensi_unroll_1 (out, src, align);
21885
21886 /* strlensi_unroll_1 returns the address of the zero at the end of
21887 the string, like memchr(), so compute the length by subtracting
21888 the start address. */
21889 emit_insn (ix86_gen_sub3 (out, out, addr));
21890 }
21891 else
21892 {
21893 rtx unspec;
21894
21895 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21896 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21897 return false;
21898
21899 scratch2 = gen_reg_rtx (Pmode);
21900 scratch3 = gen_reg_rtx (Pmode);
21901 scratch4 = force_reg (Pmode, constm1_rtx);
21902
21903 emit_move_insn (scratch3, addr);
21904 eoschar = force_reg (QImode, eoschar);
21905
21906 src = replace_equiv_address_nv (src, scratch3);
21907
21908 /* If .md starts supporting :P, this can be done in .md. */
21909 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21910 scratch4), UNSPEC_SCAS);
21911 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21912 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21913 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21914 }
21915 return true;
21916 }
21917
21918 /* For given symbol (function) construct code to compute address of it's PLT
21919 entry in large x86-64 PIC model. */
21920 rtx
21921 construct_plt_address (rtx symbol)
21922 {
21923 rtx tmp = gen_reg_rtx (Pmode);
21924 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21925
21926 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21927 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21928
21929 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21930 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21931 return tmp;
21932 }
21933
21934 rtx
21935 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21936 rtx callarg2,
21937 rtx pop, int sibcall)
21938 {
21939 rtx use = NULL, call;
21940
21941 if (pop == const0_rtx)
21942 pop = NULL;
21943 gcc_assert (!TARGET_64BIT || !pop);
21944
21945 if (TARGET_MACHO && !TARGET_64BIT)
21946 {
21947 #if TARGET_MACHO
21948 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21949 fnaddr = machopic_indirect_call_target (fnaddr);
21950 #endif
21951 }
21952 else
21953 {
21954 /* Static functions and indirect calls don't need the pic register. */
21955 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21956 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21957 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21958 use_reg (&use, pic_offset_table_rtx);
21959 }
21960
21961 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21962 {
21963 rtx al = gen_rtx_REG (QImode, AX_REG);
21964 emit_move_insn (al, callarg2);
21965 use_reg (&use, al);
21966 }
21967
21968 if (ix86_cmodel == CM_LARGE_PIC
21969 && MEM_P (fnaddr)
21970 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21971 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21972 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21973 else if (sibcall
21974 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21975 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21976 {
21977 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21978 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21979 }
21980
21981 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21982 if (retval)
21983 call = gen_rtx_SET (VOIDmode, retval, call);
21984 if (pop)
21985 {
21986 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21987 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21988 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21989 }
21990 if (TARGET_64BIT_MS_ABI
21991 && (!callarg2 || INTVAL (callarg2) != -2))
21992 {
21993 /* We need to represent that SI and DI registers are clobbered
21994 by SYSV calls. */
21995 static int clobbered_registers[] = {
21996 XMM6_REG, XMM7_REG, XMM8_REG,
21997 XMM9_REG, XMM10_REG, XMM11_REG,
21998 XMM12_REG, XMM13_REG, XMM14_REG,
21999 XMM15_REG, SI_REG, DI_REG
22000 };
22001 unsigned int i;
22002 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
22003 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22004 UNSPEC_MS_TO_SYSV_CALL);
22005
22006 vec[0] = call;
22007 vec[1] = unspec;
22008 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22009 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22010 ? TImode : DImode,
22011 gen_rtx_REG
22012 (SSE_REGNO_P (clobbered_registers[i])
22013 ? TImode : DImode,
22014 clobbered_registers[i]));
22015
22016 call = gen_rtx_PARALLEL (VOIDmode,
22017 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
22018 + 2, vec));
22019 }
22020
22021 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22022 if (TARGET_VZEROUPPER)
22023 {
22024 rtx unspec;
22025 int avx256;
22026
22027 if (cfun->machine->callee_pass_avx256_p)
22028 {
22029 if (cfun->machine->callee_return_avx256_p)
22030 avx256 = callee_return_pass_avx256;
22031 else
22032 avx256 = callee_pass_avx256;
22033 }
22034 else if (cfun->machine->callee_return_avx256_p)
22035 avx256 = callee_return_avx256;
22036 else
22037 avx256 = call_no_avx256;
22038
22039 if (reload_completed)
22040 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22041 else
22042 {
22043 unspec = gen_rtx_UNSPEC (VOIDmode,
22044 gen_rtvec (1, GEN_INT (avx256)),
22045 UNSPEC_CALL_NEEDS_VZEROUPPER);
22046 call = gen_rtx_PARALLEL (VOIDmode,
22047 gen_rtvec (2, call, unspec));
22048 }
22049 }
22050
22051 call = emit_call_insn (call);
22052 if (use)
22053 CALL_INSN_FUNCTION_USAGE (call) = use;
22054
22055 return call;
22056 }
22057
22058 void
22059 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22060 {
22061 rtx call = XVECEXP (PATTERN (insn), 0, 0);
22062 emit_insn (gen_avx_vzeroupper (vzeroupper));
22063 emit_call_insn (call);
22064 }
22065
22066 /* Output the assembly for a call instruction. */
22067
22068 const char *
22069 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
22070 {
22071 bool direct_p = constant_call_address_operand (call_op, Pmode);
22072 bool seh_nop_p = false;
22073
22074 gcc_assert (addr_op == 0 || addr_op == 1);
22075
22076 if (SIBLING_CALL_P (insn))
22077 {
22078 if (direct_p)
22079 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
22080 /* SEH epilogue detection requires the indirect branch case
22081 to include REX.W. */
22082 else if (TARGET_SEH)
22083 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
22084 else
22085 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
22086 }
22087
22088 /* SEH unwinding can require an extra nop to be emitted in several
22089 circumstances. Determine if we have one of those. */
22090 if (TARGET_SEH)
22091 {
22092 rtx i;
22093
22094 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22095 {
22096 /* If we get to another real insn, we don't need the nop. */
22097 if (INSN_P (i))
22098 break;
22099
22100 /* If we get to the epilogue note, prevent a catch region from
22101 being adjacent to the standard epilogue sequence. If non-
22102 call-exceptions, we'll have done this during epilogue emission. */
22103 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22104 && !flag_non_call_exceptions
22105 && !can_throw_internal (insn))
22106 {
22107 seh_nop_p = true;
22108 break;
22109 }
22110 }
22111
22112 /* If we didn't find a real insn following the call, prevent the
22113 unwinder from looking into the next function. */
22114 if (i == NULL)
22115 seh_nop_p = true;
22116 }
22117
22118 if (direct_p)
22119 {
22120 if (seh_nop_p)
22121 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
22122 else
22123 return addr_op ? "call\t%P1" : "call\t%P0";
22124 }
22125 else
22126 {
22127 if (seh_nop_p)
22128 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22129 else
22130 return addr_op ? "call\t%A1" : "call\t%A0";
22131 }
22132 }
22133 \f
22134 /* Clear stack slot assignments remembered from previous functions.
22135 This is called from INIT_EXPANDERS once before RTL is emitted for each
22136 function. */
22137
22138 static struct machine_function *
22139 ix86_init_machine_status (void)
22140 {
22141 struct machine_function *f;
22142
22143 f = ggc_alloc_cleared_machine_function ();
22144 f->use_fast_prologue_epilogue_nregs = -1;
22145 f->tls_descriptor_call_expanded_p = 0;
22146 f->call_abi = ix86_abi;
22147
22148 return f;
22149 }
22150
22151 /* Return a MEM corresponding to a stack slot with mode MODE.
22152 Allocate a new slot if necessary.
22153
22154 The RTL for a function can have several slots available: N is
22155 which slot to use. */
22156
22157 rtx
22158 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22159 {
22160 struct stack_local_entry *s;
22161
22162 gcc_assert (n < MAX_386_STACK_LOCALS);
22163
22164 /* Virtual slot is valid only before vregs are instantiated. */
22165 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22166
22167 for (s = ix86_stack_locals; s; s = s->next)
22168 if (s->mode == mode && s->n == n)
22169 return copy_rtx (s->rtl);
22170
22171 s = ggc_alloc_stack_local_entry ();
22172 s->n = n;
22173 s->mode = mode;
22174 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22175
22176 s->next = ix86_stack_locals;
22177 ix86_stack_locals = s;
22178 return s->rtl;
22179 }
22180
22181 /* Construct the SYMBOL_REF for the tls_get_addr function. */
22182
22183 static GTY(()) rtx ix86_tls_symbol;
22184 rtx
22185 ix86_tls_get_addr (void)
22186 {
22187
22188 if (!ix86_tls_symbol)
22189 {
22190 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
22191 (TARGET_ANY_GNU_TLS
22192 && !TARGET_64BIT)
22193 ? "___tls_get_addr"
22194 : "__tls_get_addr");
22195 }
22196
22197 return ix86_tls_symbol;
22198 }
22199
22200 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
22201
22202 static GTY(()) rtx ix86_tls_module_base_symbol;
22203 rtx
22204 ix86_tls_module_base (void)
22205 {
22206
22207 if (!ix86_tls_module_base_symbol)
22208 {
22209 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
22210 "_TLS_MODULE_BASE_");
22211 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
22212 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
22213 }
22214
22215 return ix86_tls_module_base_symbol;
22216 }
22217 \f
22218 /* Calculate the length of the memory address in the instruction
22219 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22220
22221 int
22222 memory_address_length (rtx addr)
22223 {
22224 struct ix86_address parts;
22225 rtx base, index, disp;
22226 int len;
22227 int ok;
22228
22229 if (GET_CODE (addr) == PRE_DEC
22230 || GET_CODE (addr) == POST_INC
22231 || GET_CODE (addr) == PRE_MODIFY
22232 || GET_CODE (addr) == POST_MODIFY)
22233 return 0;
22234
22235 ok = ix86_decompose_address (addr, &parts);
22236 gcc_assert (ok);
22237
22238 if (parts.base && GET_CODE (parts.base) == SUBREG)
22239 parts.base = SUBREG_REG (parts.base);
22240 if (parts.index && GET_CODE (parts.index) == SUBREG)
22241 parts.index = SUBREG_REG (parts.index);
22242
22243 base = parts.base;
22244 index = parts.index;
22245 disp = parts.disp;
22246 len = 0;
22247
22248 /* Rule of thumb:
22249 - esp as the base always wants an index,
22250 - ebp as the base always wants a displacement,
22251 - r12 as the base always wants an index,
22252 - r13 as the base always wants a displacement. */
22253
22254 /* Register Indirect. */
22255 if (base && !index && !disp)
22256 {
22257 /* esp (for its index) and ebp (for its displacement) need
22258 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22259 code. */
22260 if (REG_P (addr)
22261 && (addr == arg_pointer_rtx
22262 || addr == frame_pointer_rtx
22263 || REGNO (addr) == SP_REG
22264 || REGNO (addr) == BP_REG
22265 || REGNO (addr) == R12_REG
22266 || REGNO (addr) == R13_REG))
22267 len = 1;
22268 }
22269
22270 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22271 is not disp32, but disp32(%rip), so for disp32
22272 SIB byte is needed, unless print_operand_address
22273 optimizes it into disp32(%rip) or (%rip) is implied
22274 by UNSPEC. */
22275 else if (disp && !base && !index)
22276 {
22277 len = 4;
22278 if (TARGET_64BIT)
22279 {
22280 rtx symbol = disp;
22281
22282 if (GET_CODE (disp) == CONST)
22283 symbol = XEXP (disp, 0);
22284 if (GET_CODE (symbol) == PLUS
22285 && CONST_INT_P (XEXP (symbol, 1)))
22286 symbol = XEXP (symbol, 0);
22287
22288 if (GET_CODE (symbol) != LABEL_REF
22289 && (GET_CODE (symbol) != SYMBOL_REF
22290 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22291 && (GET_CODE (symbol) != UNSPEC
22292 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22293 && XINT (symbol, 1) != UNSPEC_PCREL
22294 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22295 len += 1;
22296 }
22297 }
22298
22299 else
22300 {
22301 /* Find the length of the displacement constant. */
22302 if (disp)
22303 {
22304 if (base && satisfies_constraint_K (disp))
22305 len = 1;
22306 else
22307 len = 4;
22308 }
22309 /* ebp always wants a displacement. Similarly r13. */
22310 else if (base && REG_P (base)
22311 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22312 len = 1;
22313
22314 /* An index requires the two-byte modrm form.... */
22315 if (index
22316 /* ...like esp (or r12), which always wants an index. */
22317 || base == arg_pointer_rtx
22318 || base == frame_pointer_rtx
22319 || (base && REG_P (base)
22320 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22321 len += 1;
22322 }
22323
22324 switch (parts.seg)
22325 {
22326 case SEG_FS:
22327 case SEG_GS:
22328 len += 1;
22329 break;
22330 default:
22331 break;
22332 }
22333
22334 return len;
22335 }
22336
22337 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22338 is set, expect that insn have 8bit immediate alternative. */
22339 int
22340 ix86_attr_length_immediate_default (rtx insn, int shortform)
22341 {
22342 int len = 0;
22343 int i;
22344 extract_insn_cached (insn);
22345 for (i = recog_data.n_operands - 1; i >= 0; --i)
22346 if (CONSTANT_P (recog_data.operand[i]))
22347 {
22348 enum attr_mode mode = get_attr_mode (insn);
22349
22350 gcc_assert (!len);
22351 if (shortform && CONST_INT_P (recog_data.operand[i]))
22352 {
22353 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22354 switch (mode)
22355 {
22356 case MODE_QI:
22357 len = 1;
22358 continue;
22359 case MODE_HI:
22360 ival = trunc_int_for_mode (ival, HImode);
22361 break;
22362 case MODE_SI:
22363 ival = trunc_int_for_mode (ival, SImode);
22364 break;
22365 default:
22366 break;
22367 }
22368 if (IN_RANGE (ival, -128, 127))
22369 {
22370 len = 1;
22371 continue;
22372 }
22373 }
22374 switch (mode)
22375 {
22376 case MODE_QI:
22377 len = 1;
22378 break;
22379 case MODE_HI:
22380 len = 2;
22381 break;
22382 case MODE_SI:
22383 len = 4;
22384 break;
22385 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22386 case MODE_DI:
22387 len = 4;
22388 break;
22389 default:
22390 fatal_insn ("unknown insn mode", insn);
22391 }
22392 }
22393 return len;
22394 }
22395 /* Compute default value for "length_address" attribute. */
22396 int
22397 ix86_attr_length_address_default (rtx insn)
22398 {
22399 int i;
22400
22401 if (get_attr_type (insn) == TYPE_LEA)
22402 {
22403 rtx set = PATTERN (insn), addr;
22404
22405 if (GET_CODE (set) == PARALLEL)
22406 set = XVECEXP (set, 0, 0);
22407
22408 gcc_assert (GET_CODE (set) == SET);
22409
22410 addr = SET_SRC (set);
22411 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22412 {
22413 if (GET_CODE (addr) == ZERO_EXTEND)
22414 addr = XEXP (addr, 0);
22415 if (GET_CODE (addr) == SUBREG)
22416 addr = SUBREG_REG (addr);
22417 }
22418
22419 return memory_address_length (addr);
22420 }
22421
22422 extract_insn_cached (insn);
22423 for (i = recog_data.n_operands - 1; i >= 0; --i)
22424 if (MEM_P (recog_data.operand[i]))
22425 {
22426 constrain_operands_cached (reload_completed);
22427 if (which_alternative != -1)
22428 {
22429 const char *constraints = recog_data.constraints[i];
22430 int alt = which_alternative;
22431
22432 while (*constraints == '=' || *constraints == '+')
22433 constraints++;
22434 while (alt-- > 0)
22435 while (*constraints++ != ',')
22436 ;
22437 /* Skip ignored operands. */
22438 if (*constraints == 'X')
22439 continue;
22440 }
22441 return memory_address_length (XEXP (recog_data.operand[i], 0));
22442 }
22443 return 0;
22444 }
22445
22446 /* Compute default value for "length_vex" attribute. It includes
22447 2 or 3 byte VEX prefix and 1 opcode byte. */
22448
22449 int
22450 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22451 int has_vex_w)
22452 {
22453 int i;
22454
22455 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22456 byte VEX prefix. */
22457 if (!has_0f_opcode || has_vex_w)
22458 return 3 + 1;
22459
22460 /* We can always use 2 byte VEX prefix in 32bit. */
22461 if (!TARGET_64BIT)
22462 return 2 + 1;
22463
22464 extract_insn_cached (insn);
22465
22466 for (i = recog_data.n_operands - 1; i >= 0; --i)
22467 if (REG_P (recog_data.operand[i]))
22468 {
22469 /* REX.W bit uses 3 byte VEX prefix. */
22470 if (GET_MODE (recog_data.operand[i]) == DImode
22471 && GENERAL_REG_P (recog_data.operand[i]))
22472 return 3 + 1;
22473 }
22474 else
22475 {
22476 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22477 if (MEM_P (recog_data.operand[i])
22478 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22479 return 3 + 1;
22480 }
22481
22482 return 2 + 1;
22483 }
22484 \f
22485 /* Return the maximum number of instructions a cpu can issue. */
22486
22487 static int
22488 ix86_issue_rate (void)
22489 {
22490 switch (ix86_tune)
22491 {
22492 case PROCESSOR_PENTIUM:
22493 case PROCESSOR_ATOM:
22494 case PROCESSOR_K6:
22495 return 2;
22496
22497 case PROCESSOR_PENTIUMPRO:
22498 case PROCESSOR_PENTIUM4:
22499 case PROCESSOR_CORE2_32:
22500 case PROCESSOR_CORE2_64:
22501 case PROCESSOR_COREI7_32:
22502 case PROCESSOR_COREI7_64:
22503 case PROCESSOR_ATHLON:
22504 case PROCESSOR_K8:
22505 case PROCESSOR_AMDFAM10:
22506 case PROCESSOR_NOCONA:
22507 case PROCESSOR_GENERIC32:
22508 case PROCESSOR_GENERIC64:
22509 case PROCESSOR_BDVER1:
22510 case PROCESSOR_BTVER1:
22511 return 3;
22512
22513 default:
22514 return 1;
22515 }
22516 }
22517
22518 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22519 by DEP_INSN and nothing set by DEP_INSN. */
22520
22521 static int
22522 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22523 {
22524 rtx set, set2;
22525
22526 /* Simplify the test for uninteresting insns. */
22527 if (insn_type != TYPE_SETCC
22528 && insn_type != TYPE_ICMOV
22529 && insn_type != TYPE_FCMOV
22530 && insn_type != TYPE_IBR)
22531 return 0;
22532
22533 if ((set = single_set (dep_insn)) != 0)
22534 {
22535 set = SET_DEST (set);
22536 set2 = NULL_RTX;
22537 }
22538 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22539 && XVECLEN (PATTERN (dep_insn), 0) == 2
22540 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22541 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22542 {
22543 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22544 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22545 }
22546 else
22547 return 0;
22548
22549 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22550 return 0;
22551
22552 /* This test is true if the dependent insn reads the flags but
22553 not any other potentially set register. */
22554 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22555 return 0;
22556
22557 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22558 return 0;
22559
22560 return 1;
22561 }
22562
22563 /* Return true iff USE_INSN has a memory address with operands set by
22564 SET_INSN. */
22565
22566 bool
22567 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22568 {
22569 int i;
22570 extract_insn_cached (use_insn);
22571 for (i = recog_data.n_operands - 1; i >= 0; --i)
22572 if (MEM_P (recog_data.operand[i]))
22573 {
22574 rtx addr = XEXP (recog_data.operand[i], 0);
22575 return modified_in_p (addr, set_insn) != 0;
22576 }
22577 return false;
22578 }
22579
22580 static int
22581 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22582 {
22583 enum attr_type insn_type, dep_insn_type;
22584 enum attr_memory memory;
22585 rtx set, set2;
22586 int dep_insn_code_number;
22587
22588 /* Anti and output dependencies have zero cost on all CPUs. */
22589 if (REG_NOTE_KIND (link) != 0)
22590 return 0;
22591
22592 dep_insn_code_number = recog_memoized (dep_insn);
22593
22594 /* If we can't recognize the insns, we can't really do anything. */
22595 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22596 return cost;
22597
22598 insn_type = get_attr_type (insn);
22599 dep_insn_type = get_attr_type (dep_insn);
22600
22601 switch (ix86_tune)
22602 {
22603 case PROCESSOR_PENTIUM:
22604 /* Address Generation Interlock adds a cycle of latency. */
22605 if (insn_type == TYPE_LEA)
22606 {
22607 rtx addr = PATTERN (insn);
22608
22609 if (GET_CODE (addr) == PARALLEL)
22610 addr = XVECEXP (addr, 0, 0);
22611
22612 gcc_assert (GET_CODE (addr) == SET);
22613
22614 addr = SET_SRC (addr);
22615 if (modified_in_p (addr, dep_insn))
22616 cost += 1;
22617 }
22618 else if (ix86_agi_dependent (dep_insn, insn))
22619 cost += 1;
22620
22621 /* ??? Compares pair with jump/setcc. */
22622 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22623 cost = 0;
22624
22625 /* Floating point stores require value to be ready one cycle earlier. */
22626 if (insn_type == TYPE_FMOV
22627 && get_attr_memory (insn) == MEMORY_STORE
22628 && !ix86_agi_dependent (dep_insn, insn))
22629 cost += 1;
22630 break;
22631
22632 case PROCESSOR_PENTIUMPRO:
22633 memory = get_attr_memory (insn);
22634
22635 /* INT->FP conversion is expensive. */
22636 if (get_attr_fp_int_src (dep_insn))
22637 cost += 5;
22638
22639 /* There is one cycle extra latency between an FP op and a store. */
22640 if (insn_type == TYPE_FMOV
22641 && (set = single_set (dep_insn)) != NULL_RTX
22642 && (set2 = single_set (insn)) != NULL_RTX
22643 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22644 && MEM_P (SET_DEST (set2)))
22645 cost += 1;
22646
22647 /* Show ability of reorder buffer to hide latency of load by executing
22648 in parallel with previous instruction in case
22649 previous instruction is not needed to compute the address. */
22650 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22651 && !ix86_agi_dependent (dep_insn, insn))
22652 {
22653 /* Claim moves to take one cycle, as core can issue one load
22654 at time and the next load can start cycle later. */
22655 if (dep_insn_type == TYPE_IMOV
22656 || dep_insn_type == TYPE_FMOV)
22657 cost = 1;
22658 else if (cost > 1)
22659 cost--;
22660 }
22661 break;
22662
22663 case PROCESSOR_K6:
22664 memory = get_attr_memory (insn);
22665
22666 /* The esp dependency is resolved before the instruction is really
22667 finished. */
22668 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22669 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22670 return 1;
22671
22672 /* INT->FP conversion is expensive. */
22673 if (get_attr_fp_int_src (dep_insn))
22674 cost += 5;
22675
22676 /* Show ability of reorder buffer to hide latency of load by executing
22677 in parallel with previous instruction in case
22678 previous instruction is not needed to compute the address. */
22679 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22680 && !ix86_agi_dependent (dep_insn, insn))
22681 {
22682 /* Claim moves to take one cycle, as core can issue one load
22683 at time and the next load can start cycle later. */
22684 if (dep_insn_type == TYPE_IMOV
22685 || dep_insn_type == TYPE_FMOV)
22686 cost = 1;
22687 else if (cost > 2)
22688 cost -= 2;
22689 else
22690 cost = 1;
22691 }
22692 break;
22693
22694 case PROCESSOR_ATHLON:
22695 case PROCESSOR_K8:
22696 case PROCESSOR_AMDFAM10:
22697 case PROCESSOR_BDVER1:
22698 case PROCESSOR_BTVER1:
22699 case PROCESSOR_ATOM:
22700 case PROCESSOR_GENERIC32:
22701 case PROCESSOR_GENERIC64:
22702 memory = get_attr_memory (insn);
22703
22704 /* Show ability of reorder buffer to hide latency of load by executing
22705 in parallel with previous instruction in case
22706 previous instruction is not needed to compute the address. */
22707 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22708 && !ix86_agi_dependent (dep_insn, insn))
22709 {
22710 enum attr_unit unit = get_attr_unit (insn);
22711 int loadcost = 3;
22712
22713 /* Because of the difference between the length of integer and
22714 floating unit pipeline preparation stages, the memory operands
22715 for floating point are cheaper.
22716
22717 ??? For Athlon it the difference is most probably 2. */
22718 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22719 loadcost = 3;
22720 else
22721 loadcost = TARGET_ATHLON ? 2 : 0;
22722
22723 if (cost >= loadcost)
22724 cost -= loadcost;
22725 else
22726 cost = 0;
22727 }
22728
22729 default:
22730 break;
22731 }
22732
22733 return cost;
22734 }
22735
22736 /* How many alternative schedules to try. This should be as wide as the
22737 scheduling freedom in the DFA, but no wider. Making this value too
22738 large results extra work for the scheduler. */
22739
22740 static int
22741 ia32_multipass_dfa_lookahead (void)
22742 {
22743 switch (ix86_tune)
22744 {
22745 case PROCESSOR_PENTIUM:
22746 return 2;
22747
22748 case PROCESSOR_PENTIUMPRO:
22749 case PROCESSOR_K6:
22750 return 1;
22751
22752 case PROCESSOR_CORE2_32:
22753 case PROCESSOR_CORE2_64:
22754 case PROCESSOR_COREI7_32:
22755 case PROCESSOR_COREI7_64:
22756 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22757 as many instructions can be executed on a cycle, i.e.,
22758 issue_rate. I wonder why tuning for many CPUs does not do this. */
22759 return ix86_issue_rate ();
22760
22761 default:
22762 return 0;
22763 }
22764 }
22765
22766 \f
22767
22768 /* Model decoder of Core 2/i7.
22769 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22770 track the instruction fetch block boundaries and make sure that long
22771 (9+ bytes) instructions are assigned to D0. */
22772
22773 /* Maximum length of an insn that can be handled by
22774 a secondary decoder unit. '8' for Core 2/i7. */
22775 static int core2i7_secondary_decoder_max_insn_size;
22776
22777 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22778 '16' for Core 2/i7. */
22779 static int core2i7_ifetch_block_size;
22780
22781 /* Maximum number of instructions decoder can handle per cycle.
22782 '6' for Core 2/i7. */
22783 static int core2i7_ifetch_block_max_insns;
22784
22785 typedef struct ix86_first_cycle_multipass_data_ *
22786 ix86_first_cycle_multipass_data_t;
22787 typedef const struct ix86_first_cycle_multipass_data_ *
22788 const_ix86_first_cycle_multipass_data_t;
22789
22790 /* A variable to store target state across calls to max_issue within
22791 one cycle. */
22792 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22793 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22794
22795 /* Initialize DATA. */
22796 static void
22797 core2i7_first_cycle_multipass_init (void *_data)
22798 {
22799 ix86_first_cycle_multipass_data_t data
22800 = (ix86_first_cycle_multipass_data_t) _data;
22801
22802 data->ifetch_block_len = 0;
22803 data->ifetch_block_n_insns = 0;
22804 data->ready_try_change = NULL;
22805 data->ready_try_change_size = 0;
22806 }
22807
22808 /* Advancing the cycle; reset ifetch block counts. */
22809 static void
22810 core2i7_dfa_post_advance_cycle (void)
22811 {
22812 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22813
22814 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22815
22816 data->ifetch_block_len = 0;
22817 data->ifetch_block_n_insns = 0;
22818 }
22819
22820 static int min_insn_size (rtx);
22821
22822 /* Filter out insns from ready_try that the core will not be able to issue
22823 on current cycle due to decoder. */
22824 static void
22825 core2i7_first_cycle_multipass_filter_ready_try
22826 (const_ix86_first_cycle_multipass_data_t data,
22827 char *ready_try, int n_ready, bool first_cycle_insn_p)
22828 {
22829 while (n_ready--)
22830 {
22831 rtx insn;
22832 int insn_size;
22833
22834 if (ready_try[n_ready])
22835 continue;
22836
22837 insn = get_ready_element (n_ready);
22838 insn_size = min_insn_size (insn);
22839
22840 if (/* If this is a too long an insn for a secondary decoder ... */
22841 (!first_cycle_insn_p
22842 && insn_size > core2i7_secondary_decoder_max_insn_size)
22843 /* ... or it would not fit into the ifetch block ... */
22844 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22845 /* ... or the decoder is full already ... */
22846 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22847 /* ... mask the insn out. */
22848 {
22849 ready_try[n_ready] = 1;
22850
22851 if (data->ready_try_change)
22852 SET_BIT (data->ready_try_change, n_ready);
22853 }
22854 }
22855 }
22856
22857 /* Prepare for a new round of multipass lookahead scheduling. */
22858 static void
22859 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22860 bool first_cycle_insn_p)
22861 {
22862 ix86_first_cycle_multipass_data_t data
22863 = (ix86_first_cycle_multipass_data_t) _data;
22864 const_ix86_first_cycle_multipass_data_t prev_data
22865 = ix86_first_cycle_multipass_data;
22866
22867 /* Restore the state from the end of the previous round. */
22868 data->ifetch_block_len = prev_data->ifetch_block_len;
22869 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22870
22871 /* Filter instructions that cannot be issued on current cycle due to
22872 decoder restrictions. */
22873 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22874 first_cycle_insn_p);
22875 }
22876
22877 /* INSN is being issued in current solution. Account for its impact on
22878 the decoder model. */
22879 static void
22880 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22881 rtx insn, const void *_prev_data)
22882 {
22883 ix86_first_cycle_multipass_data_t data
22884 = (ix86_first_cycle_multipass_data_t) _data;
22885 const_ix86_first_cycle_multipass_data_t prev_data
22886 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22887
22888 int insn_size = min_insn_size (insn);
22889
22890 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22891 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22892 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22893 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22894
22895 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22896 if (!data->ready_try_change)
22897 {
22898 data->ready_try_change = sbitmap_alloc (n_ready);
22899 data->ready_try_change_size = n_ready;
22900 }
22901 else if (data->ready_try_change_size < n_ready)
22902 {
22903 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22904 n_ready, 0);
22905 data->ready_try_change_size = n_ready;
22906 }
22907 sbitmap_zero (data->ready_try_change);
22908
22909 /* Filter out insns from ready_try that the core will not be able to issue
22910 on current cycle due to decoder. */
22911 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22912 false);
22913 }
22914
22915 /* Revert the effect on ready_try. */
22916 static void
22917 core2i7_first_cycle_multipass_backtrack (const void *_data,
22918 char *ready_try,
22919 int n_ready ATTRIBUTE_UNUSED)
22920 {
22921 const_ix86_first_cycle_multipass_data_t data
22922 = (const_ix86_first_cycle_multipass_data_t) _data;
22923 unsigned int i = 0;
22924 sbitmap_iterator sbi;
22925
22926 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22927 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22928 {
22929 ready_try[i] = 0;
22930 }
22931 }
22932
22933 /* Save the result of multipass lookahead scheduling for the next round. */
22934 static void
22935 core2i7_first_cycle_multipass_end (const void *_data)
22936 {
22937 const_ix86_first_cycle_multipass_data_t data
22938 = (const_ix86_first_cycle_multipass_data_t) _data;
22939 ix86_first_cycle_multipass_data_t next_data
22940 = ix86_first_cycle_multipass_data;
22941
22942 if (data != NULL)
22943 {
22944 next_data->ifetch_block_len = data->ifetch_block_len;
22945 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22946 }
22947 }
22948
22949 /* Deallocate target data. */
22950 static void
22951 core2i7_first_cycle_multipass_fini (void *_data)
22952 {
22953 ix86_first_cycle_multipass_data_t data
22954 = (ix86_first_cycle_multipass_data_t) _data;
22955
22956 if (data->ready_try_change)
22957 {
22958 sbitmap_free (data->ready_try_change);
22959 data->ready_try_change = NULL;
22960 data->ready_try_change_size = 0;
22961 }
22962 }
22963
22964 /* Prepare for scheduling pass. */
22965 static void
22966 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22967 int verbose ATTRIBUTE_UNUSED,
22968 int max_uid ATTRIBUTE_UNUSED)
22969 {
22970 /* Install scheduling hooks for current CPU. Some of these hooks are used
22971 in time-critical parts of the scheduler, so we only set them up when
22972 they are actually used. */
22973 switch (ix86_tune)
22974 {
22975 case PROCESSOR_CORE2_32:
22976 case PROCESSOR_CORE2_64:
22977 case PROCESSOR_COREI7_32:
22978 case PROCESSOR_COREI7_64:
22979 targetm.sched.dfa_post_advance_cycle
22980 = core2i7_dfa_post_advance_cycle;
22981 targetm.sched.first_cycle_multipass_init
22982 = core2i7_first_cycle_multipass_init;
22983 targetm.sched.first_cycle_multipass_begin
22984 = core2i7_first_cycle_multipass_begin;
22985 targetm.sched.first_cycle_multipass_issue
22986 = core2i7_first_cycle_multipass_issue;
22987 targetm.sched.first_cycle_multipass_backtrack
22988 = core2i7_first_cycle_multipass_backtrack;
22989 targetm.sched.first_cycle_multipass_end
22990 = core2i7_first_cycle_multipass_end;
22991 targetm.sched.first_cycle_multipass_fini
22992 = core2i7_first_cycle_multipass_fini;
22993
22994 /* Set decoder parameters. */
22995 core2i7_secondary_decoder_max_insn_size = 8;
22996 core2i7_ifetch_block_size = 16;
22997 core2i7_ifetch_block_max_insns = 6;
22998 break;
22999
23000 default:
23001 targetm.sched.dfa_post_advance_cycle = NULL;
23002 targetm.sched.first_cycle_multipass_init = NULL;
23003 targetm.sched.first_cycle_multipass_begin = NULL;
23004 targetm.sched.first_cycle_multipass_issue = NULL;
23005 targetm.sched.first_cycle_multipass_backtrack = NULL;
23006 targetm.sched.first_cycle_multipass_end = NULL;
23007 targetm.sched.first_cycle_multipass_fini = NULL;
23008 break;
23009 }
23010 }
23011
23012 \f
23013 /* Compute the alignment given to a constant that is being placed in memory.
23014 EXP is the constant and ALIGN is the alignment that the object would
23015 ordinarily have.
23016 The value of this function is used instead of that alignment to align
23017 the object. */
23018
23019 int
23020 ix86_constant_alignment (tree exp, int align)
23021 {
23022 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23023 || TREE_CODE (exp) == INTEGER_CST)
23024 {
23025 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23026 return 64;
23027 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23028 return 128;
23029 }
23030 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23031 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23032 return BITS_PER_WORD;
23033
23034 return align;
23035 }
23036
23037 /* Compute the alignment for a static variable.
23038 TYPE is the data type, and ALIGN is the alignment that
23039 the object would ordinarily have. The value of this function is used
23040 instead of that alignment to align the object. */
23041
23042 int
23043 ix86_data_alignment (tree type, int align)
23044 {
23045 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23046
23047 if (AGGREGATE_TYPE_P (type)
23048 && TYPE_SIZE (type)
23049 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23050 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23051 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23052 && align < max_align)
23053 align = max_align;
23054
23055 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23056 to 16byte boundary. */
23057 if (TARGET_64BIT)
23058 {
23059 if (AGGREGATE_TYPE_P (type)
23060 && TYPE_SIZE (type)
23061 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23062 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23063 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23064 return 128;
23065 }
23066
23067 if (TREE_CODE (type) == ARRAY_TYPE)
23068 {
23069 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23070 return 64;
23071 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23072 return 128;
23073 }
23074 else if (TREE_CODE (type) == COMPLEX_TYPE)
23075 {
23076
23077 if (TYPE_MODE (type) == DCmode && align < 64)
23078 return 64;
23079 if ((TYPE_MODE (type) == XCmode
23080 || TYPE_MODE (type) == TCmode) && align < 128)
23081 return 128;
23082 }
23083 else if ((TREE_CODE (type) == RECORD_TYPE
23084 || TREE_CODE (type) == UNION_TYPE
23085 || TREE_CODE (type) == QUAL_UNION_TYPE)
23086 && TYPE_FIELDS (type))
23087 {
23088 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23089 return 64;
23090 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23091 return 128;
23092 }
23093 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23094 || TREE_CODE (type) == INTEGER_TYPE)
23095 {
23096 if (TYPE_MODE (type) == DFmode && align < 64)
23097 return 64;
23098 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23099 return 128;
23100 }
23101
23102 return align;
23103 }
23104
23105 /* Compute the alignment for a local variable or a stack slot. EXP is
23106 the data type or decl itself, MODE is the widest mode available and
23107 ALIGN is the alignment that the object would ordinarily have. The
23108 value of this macro is used instead of that alignment to align the
23109 object. */
23110
23111 unsigned int
23112 ix86_local_alignment (tree exp, enum machine_mode mode,
23113 unsigned int align)
23114 {
23115 tree type, decl;
23116
23117 if (exp && DECL_P (exp))
23118 {
23119 type = TREE_TYPE (exp);
23120 decl = exp;
23121 }
23122 else
23123 {
23124 type = exp;
23125 decl = NULL;
23126 }
23127
23128 /* Don't do dynamic stack realignment for long long objects with
23129 -mpreferred-stack-boundary=2. */
23130 if (!TARGET_64BIT
23131 && align == 64
23132 && ix86_preferred_stack_boundary < 64
23133 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23134 && (!type || !TYPE_USER_ALIGN (type))
23135 && (!decl || !DECL_USER_ALIGN (decl)))
23136 align = 32;
23137
23138 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23139 register in MODE. We will return the largest alignment of XF
23140 and DF. */
23141 if (!type)
23142 {
23143 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23144 align = GET_MODE_ALIGNMENT (DFmode);
23145 return align;
23146 }
23147
23148 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23149 to 16byte boundary. Exact wording is:
23150
23151 An array uses the same alignment as its elements, except that a local or
23152 global array variable of length at least 16 bytes or
23153 a C99 variable-length array variable always has alignment of at least 16 bytes.
23154
23155 This was added to allow use of aligned SSE instructions at arrays. This
23156 rule is meant for static storage (where compiler can not do the analysis
23157 by itself). We follow it for automatic variables only when convenient.
23158 We fully control everything in the function compiled and functions from
23159 other unit can not rely on the alignment.
23160
23161 Exclude va_list type. It is the common case of local array where
23162 we can not benefit from the alignment. */
23163 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23164 && TARGET_SSE)
23165 {
23166 if (AGGREGATE_TYPE_P (type)
23167 && (va_list_type_node == NULL_TREE
23168 || (TYPE_MAIN_VARIANT (type)
23169 != TYPE_MAIN_VARIANT (va_list_type_node)))
23170 && TYPE_SIZE (type)
23171 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23172 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23173 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23174 return 128;
23175 }
23176 if (TREE_CODE (type) == ARRAY_TYPE)
23177 {
23178 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23179 return 64;
23180 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23181 return 128;
23182 }
23183 else if (TREE_CODE (type) == COMPLEX_TYPE)
23184 {
23185 if (TYPE_MODE (type) == DCmode && align < 64)
23186 return 64;
23187 if ((TYPE_MODE (type) == XCmode
23188 || TYPE_MODE (type) == TCmode) && align < 128)
23189 return 128;
23190 }
23191 else if ((TREE_CODE (type) == RECORD_TYPE
23192 || TREE_CODE (type) == UNION_TYPE
23193 || TREE_CODE (type) == QUAL_UNION_TYPE)
23194 && TYPE_FIELDS (type))
23195 {
23196 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23197 return 64;
23198 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23199 return 128;
23200 }
23201 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23202 || TREE_CODE (type) == INTEGER_TYPE)
23203 {
23204
23205 if (TYPE_MODE (type) == DFmode && align < 64)
23206 return 64;
23207 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23208 return 128;
23209 }
23210 return align;
23211 }
23212
23213 /* Compute the minimum required alignment for dynamic stack realignment
23214 purposes for a local variable, parameter or a stack slot. EXP is
23215 the data type or decl itself, MODE is its mode and ALIGN is the
23216 alignment that the object would ordinarily have. */
23217
23218 unsigned int
23219 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23220 unsigned int align)
23221 {
23222 tree type, decl;
23223
23224 if (exp && DECL_P (exp))
23225 {
23226 type = TREE_TYPE (exp);
23227 decl = exp;
23228 }
23229 else
23230 {
23231 type = exp;
23232 decl = NULL;
23233 }
23234
23235 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23236 return align;
23237
23238 /* Don't do dynamic stack realignment for long long objects with
23239 -mpreferred-stack-boundary=2. */
23240 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23241 && (!type || !TYPE_USER_ALIGN (type))
23242 && (!decl || !DECL_USER_ALIGN (decl)))
23243 return 32;
23244
23245 return align;
23246 }
23247 \f
23248 /* Find a location for the static chain incoming to a nested function.
23249 This is a register, unless all free registers are used by arguments. */
23250
23251 static rtx
23252 ix86_static_chain (const_tree fndecl, bool incoming_p)
23253 {
23254 unsigned regno;
23255
23256 if (!DECL_STATIC_CHAIN (fndecl))
23257 return NULL;
23258
23259 if (TARGET_64BIT)
23260 {
23261 /* We always use R10 in 64-bit mode. */
23262 regno = R10_REG;
23263 }
23264 else
23265 {
23266 tree fntype;
23267 unsigned int ccvt;
23268
23269 /* By default in 32-bit mode we use ECX to pass the static chain. */
23270 regno = CX_REG;
23271
23272 fntype = TREE_TYPE (fndecl);
23273 ccvt = ix86_get_callcvt (fntype);
23274 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23275 {
23276 /* Fastcall functions use ecx/edx for arguments, which leaves
23277 us with EAX for the static chain.
23278 Thiscall functions use ecx for arguments, which also
23279 leaves us with EAX for the static chain. */
23280 regno = AX_REG;
23281 }
23282 else if (ix86_function_regparm (fntype, fndecl) == 3)
23283 {
23284 /* For regparm 3, we have no free call-clobbered registers in
23285 which to store the static chain. In order to implement this,
23286 we have the trampoline push the static chain to the stack.
23287 However, we can't push a value below the return address when
23288 we call the nested function directly, so we have to use an
23289 alternate entry point. For this we use ESI, and have the
23290 alternate entry point push ESI, so that things appear the
23291 same once we're executing the nested function. */
23292 if (incoming_p)
23293 {
23294 if (fndecl == current_function_decl)
23295 ix86_static_chain_on_stack = true;
23296 return gen_frame_mem (SImode,
23297 plus_constant (arg_pointer_rtx, -8));
23298 }
23299 regno = SI_REG;
23300 }
23301 }
23302
23303 return gen_rtx_REG (Pmode, regno);
23304 }
23305
23306 /* Emit RTL insns to initialize the variable parts of a trampoline.
23307 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23308 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23309 to be passed to the target function. */
23310
23311 static void
23312 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23313 {
23314 rtx mem, fnaddr;
23315
23316 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23317
23318 if (!TARGET_64BIT)
23319 {
23320 rtx disp, chain;
23321 int opcode;
23322
23323 /* Depending on the static chain location, either load a register
23324 with a constant, or push the constant to the stack. All of the
23325 instructions are the same size. */
23326 chain = ix86_static_chain (fndecl, true);
23327 if (REG_P (chain))
23328 {
23329 if (REGNO (chain) == CX_REG)
23330 opcode = 0xb9;
23331 else if (REGNO (chain) == AX_REG)
23332 opcode = 0xb8;
23333 else
23334 gcc_unreachable ();
23335 }
23336 else
23337 opcode = 0x68;
23338
23339 mem = adjust_address (m_tramp, QImode, 0);
23340 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23341
23342 mem = adjust_address (m_tramp, SImode, 1);
23343 emit_move_insn (mem, chain_value);
23344
23345 /* Compute offset from the end of the jmp to the target function.
23346 In the case in which the trampoline stores the static chain on
23347 the stack, we need to skip the first insn which pushes the
23348 (call-saved) register static chain; this push is 1 byte. */
23349 disp = expand_binop (SImode, sub_optab, fnaddr,
23350 plus_constant (XEXP (m_tramp, 0),
23351 MEM_P (chain) ? 9 : 10),
23352 NULL_RTX, 1, OPTAB_DIRECT);
23353
23354 mem = adjust_address (m_tramp, QImode, 5);
23355 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23356
23357 mem = adjust_address (m_tramp, SImode, 6);
23358 emit_move_insn (mem, disp);
23359 }
23360 else
23361 {
23362 int offset = 0;
23363
23364 /* Load the function address to r11. Try to load address using
23365 the shorter movl instead of movabs. We may want to support
23366 movq for kernel mode, but kernel does not use trampolines at
23367 the moment. */
23368 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23369 {
23370 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23371
23372 mem = adjust_address (m_tramp, HImode, offset);
23373 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23374
23375 mem = adjust_address (m_tramp, SImode, offset + 2);
23376 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23377 offset += 6;
23378 }
23379 else
23380 {
23381 mem = adjust_address (m_tramp, HImode, offset);
23382 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23383
23384 mem = adjust_address (m_tramp, DImode, offset + 2);
23385 emit_move_insn (mem, fnaddr);
23386 offset += 10;
23387 }
23388
23389 /* Load static chain using movabs to r10. */
23390 mem = adjust_address (m_tramp, HImode, offset);
23391 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23392
23393 mem = adjust_address (m_tramp, DImode, offset + 2);
23394 emit_move_insn (mem, chain_value);
23395 offset += 10;
23396
23397 /* Jump to r11; the last (unused) byte is a nop, only there to
23398 pad the write out to a single 32-bit store. */
23399 mem = adjust_address (m_tramp, SImode, offset);
23400 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23401 offset += 4;
23402
23403 gcc_assert (offset <= TRAMPOLINE_SIZE);
23404 }
23405
23406 #ifdef ENABLE_EXECUTE_STACK
23407 #ifdef CHECK_EXECUTE_STACK_ENABLED
23408 if (CHECK_EXECUTE_STACK_ENABLED)
23409 #endif
23410 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23411 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23412 #endif
23413 }
23414 \f
23415 /* The following file contains several enumerations and data structures
23416 built from the definitions in i386-builtin-types.def. */
23417
23418 #include "i386-builtin-types.inc"
23419
23420 /* Table for the ix86 builtin non-function types. */
23421 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23422
23423 /* Retrieve an element from the above table, building some of
23424 the types lazily. */
23425
23426 static tree
23427 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23428 {
23429 unsigned int index;
23430 tree type, itype;
23431
23432 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23433
23434 type = ix86_builtin_type_tab[(int) tcode];
23435 if (type != NULL)
23436 return type;
23437
23438 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23439 if (tcode <= IX86_BT_LAST_VECT)
23440 {
23441 enum machine_mode mode;
23442
23443 index = tcode - IX86_BT_LAST_PRIM - 1;
23444 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23445 mode = ix86_builtin_type_vect_mode[index];
23446
23447 type = build_vector_type_for_mode (itype, mode);
23448 }
23449 else
23450 {
23451 int quals;
23452
23453 index = tcode - IX86_BT_LAST_VECT - 1;
23454 if (tcode <= IX86_BT_LAST_PTR)
23455 quals = TYPE_UNQUALIFIED;
23456 else
23457 quals = TYPE_QUAL_CONST;
23458
23459 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23460 if (quals != TYPE_UNQUALIFIED)
23461 itype = build_qualified_type (itype, quals);
23462
23463 type = build_pointer_type (itype);
23464 }
23465
23466 ix86_builtin_type_tab[(int) tcode] = type;
23467 return type;
23468 }
23469
23470 /* Table for the ix86 builtin function types. */
23471 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23472
23473 /* Retrieve an element from the above table, building some of
23474 the types lazily. */
23475
23476 static tree
23477 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23478 {
23479 tree type;
23480
23481 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23482
23483 type = ix86_builtin_func_type_tab[(int) tcode];
23484 if (type != NULL)
23485 return type;
23486
23487 if (tcode <= IX86_BT_LAST_FUNC)
23488 {
23489 unsigned start = ix86_builtin_func_start[(int) tcode];
23490 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23491 tree rtype, atype, args = void_list_node;
23492 unsigned i;
23493
23494 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23495 for (i = after - 1; i > start; --i)
23496 {
23497 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23498 args = tree_cons (NULL, atype, args);
23499 }
23500
23501 type = build_function_type (rtype, args);
23502 }
23503 else
23504 {
23505 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23506 enum ix86_builtin_func_type icode;
23507
23508 icode = ix86_builtin_func_alias_base[index];
23509 type = ix86_get_builtin_func_type (icode);
23510 }
23511
23512 ix86_builtin_func_type_tab[(int) tcode] = type;
23513 return type;
23514 }
23515
23516
23517 /* Codes for all the SSE/MMX builtins. */
23518 enum ix86_builtins
23519 {
23520 IX86_BUILTIN_ADDPS,
23521 IX86_BUILTIN_ADDSS,
23522 IX86_BUILTIN_DIVPS,
23523 IX86_BUILTIN_DIVSS,
23524 IX86_BUILTIN_MULPS,
23525 IX86_BUILTIN_MULSS,
23526 IX86_BUILTIN_SUBPS,
23527 IX86_BUILTIN_SUBSS,
23528
23529 IX86_BUILTIN_CMPEQPS,
23530 IX86_BUILTIN_CMPLTPS,
23531 IX86_BUILTIN_CMPLEPS,
23532 IX86_BUILTIN_CMPGTPS,
23533 IX86_BUILTIN_CMPGEPS,
23534 IX86_BUILTIN_CMPNEQPS,
23535 IX86_BUILTIN_CMPNLTPS,
23536 IX86_BUILTIN_CMPNLEPS,
23537 IX86_BUILTIN_CMPNGTPS,
23538 IX86_BUILTIN_CMPNGEPS,
23539 IX86_BUILTIN_CMPORDPS,
23540 IX86_BUILTIN_CMPUNORDPS,
23541 IX86_BUILTIN_CMPEQSS,
23542 IX86_BUILTIN_CMPLTSS,
23543 IX86_BUILTIN_CMPLESS,
23544 IX86_BUILTIN_CMPNEQSS,
23545 IX86_BUILTIN_CMPNLTSS,
23546 IX86_BUILTIN_CMPNLESS,
23547 IX86_BUILTIN_CMPNGTSS,
23548 IX86_BUILTIN_CMPNGESS,
23549 IX86_BUILTIN_CMPORDSS,
23550 IX86_BUILTIN_CMPUNORDSS,
23551
23552 IX86_BUILTIN_COMIEQSS,
23553 IX86_BUILTIN_COMILTSS,
23554 IX86_BUILTIN_COMILESS,
23555 IX86_BUILTIN_COMIGTSS,
23556 IX86_BUILTIN_COMIGESS,
23557 IX86_BUILTIN_COMINEQSS,
23558 IX86_BUILTIN_UCOMIEQSS,
23559 IX86_BUILTIN_UCOMILTSS,
23560 IX86_BUILTIN_UCOMILESS,
23561 IX86_BUILTIN_UCOMIGTSS,
23562 IX86_BUILTIN_UCOMIGESS,
23563 IX86_BUILTIN_UCOMINEQSS,
23564
23565 IX86_BUILTIN_CVTPI2PS,
23566 IX86_BUILTIN_CVTPS2PI,
23567 IX86_BUILTIN_CVTSI2SS,
23568 IX86_BUILTIN_CVTSI642SS,
23569 IX86_BUILTIN_CVTSS2SI,
23570 IX86_BUILTIN_CVTSS2SI64,
23571 IX86_BUILTIN_CVTTPS2PI,
23572 IX86_BUILTIN_CVTTSS2SI,
23573 IX86_BUILTIN_CVTTSS2SI64,
23574
23575 IX86_BUILTIN_MAXPS,
23576 IX86_BUILTIN_MAXSS,
23577 IX86_BUILTIN_MINPS,
23578 IX86_BUILTIN_MINSS,
23579
23580 IX86_BUILTIN_LOADUPS,
23581 IX86_BUILTIN_STOREUPS,
23582 IX86_BUILTIN_MOVSS,
23583
23584 IX86_BUILTIN_MOVHLPS,
23585 IX86_BUILTIN_MOVLHPS,
23586 IX86_BUILTIN_LOADHPS,
23587 IX86_BUILTIN_LOADLPS,
23588 IX86_BUILTIN_STOREHPS,
23589 IX86_BUILTIN_STORELPS,
23590
23591 IX86_BUILTIN_MASKMOVQ,
23592 IX86_BUILTIN_MOVMSKPS,
23593 IX86_BUILTIN_PMOVMSKB,
23594
23595 IX86_BUILTIN_MOVNTPS,
23596 IX86_BUILTIN_MOVNTQ,
23597
23598 IX86_BUILTIN_LOADDQU,
23599 IX86_BUILTIN_STOREDQU,
23600
23601 IX86_BUILTIN_PACKSSWB,
23602 IX86_BUILTIN_PACKSSDW,
23603 IX86_BUILTIN_PACKUSWB,
23604
23605 IX86_BUILTIN_PADDB,
23606 IX86_BUILTIN_PADDW,
23607 IX86_BUILTIN_PADDD,
23608 IX86_BUILTIN_PADDQ,
23609 IX86_BUILTIN_PADDSB,
23610 IX86_BUILTIN_PADDSW,
23611 IX86_BUILTIN_PADDUSB,
23612 IX86_BUILTIN_PADDUSW,
23613 IX86_BUILTIN_PSUBB,
23614 IX86_BUILTIN_PSUBW,
23615 IX86_BUILTIN_PSUBD,
23616 IX86_BUILTIN_PSUBQ,
23617 IX86_BUILTIN_PSUBSB,
23618 IX86_BUILTIN_PSUBSW,
23619 IX86_BUILTIN_PSUBUSB,
23620 IX86_BUILTIN_PSUBUSW,
23621
23622 IX86_BUILTIN_PAND,
23623 IX86_BUILTIN_PANDN,
23624 IX86_BUILTIN_POR,
23625 IX86_BUILTIN_PXOR,
23626
23627 IX86_BUILTIN_PAVGB,
23628 IX86_BUILTIN_PAVGW,
23629
23630 IX86_BUILTIN_PCMPEQB,
23631 IX86_BUILTIN_PCMPEQW,
23632 IX86_BUILTIN_PCMPEQD,
23633 IX86_BUILTIN_PCMPGTB,
23634 IX86_BUILTIN_PCMPGTW,
23635 IX86_BUILTIN_PCMPGTD,
23636
23637 IX86_BUILTIN_PMADDWD,
23638
23639 IX86_BUILTIN_PMAXSW,
23640 IX86_BUILTIN_PMAXUB,
23641 IX86_BUILTIN_PMINSW,
23642 IX86_BUILTIN_PMINUB,
23643
23644 IX86_BUILTIN_PMULHUW,
23645 IX86_BUILTIN_PMULHW,
23646 IX86_BUILTIN_PMULLW,
23647
23648 IX86_BUILTIN_PSADBW,
23649 IX86_BUILTIN_PSHUFW,
23650
23651 IX86_BUILTIN_PSLLW,
23652 IX86_BUILTIN_PSLLD,
23653 IX86_BUILTIN_PSLLQ,
23654 IX86_BUILTIN_PSRAW,
23655 IX86_BUILTIN_PSRAD,
23656 IX86_BUILTIN_PSRLW,
23657 IX86_BUILTIN_PSRLD,
23658 IX86_BUILTIN_PSRLQ,
23659 IX86_BUILTIN_PSLLWI,
23660 IX86_BUILTIN_PSLLDI,
23661 IX86_BUILTIN_PSLLQI,
23662 IX86_BUILTIN_PSRAWI,
23663 IX86_BUILTIN_PSRADI,
23664 IX86_BUILTIN_PSRLWI,
23665 IX86_BUILTIN_PSRLDI,
23666 IX86_BUILTIN_PSRLQI,
23667
23668 IX86_BUILTIN_PUNPCKHBW,
23669 IX86_BUILTIN_PUNPCKHWD,
23670 IX86_BUILTIN_PUNPCKHDQ,
23671 IX86_BUILTIN_PUNPCKLBW,
23672 IX86_BUILTIN_PUNPCKLWD,
23673 IX86_BUILTIN_PUNPCKLDQ,
23674
23675 IX86_BUILTIN_SHUFPS,
23676
23677 IX86_BUILTIN_RCPPS,
23678 IX86_BUILTIN_RCPSS,
23679 IX86_BUILTIN_RSQRTPS,
23680 IX86_BUILTIN_RSQRTPS_NR,
23681 IX86_BUILTIN_RSQRTSS,
23682 IX86_BUILTIN_RSQRTF,
23683 IX86_BUILTIN_SQRTPS,
23684 IX86_BUILTIN_SQRTPS_NR,
23685 IX86_BUILTIN_SQRTSS,
23686
23687 IX86_BUILTIN_UNPCKHPS,
23688 IX86_BUILTIN_UNPCKLPS,
23689
23690 IX86_BUILTIN_ANDPS,
23691 IX86_BUILTIN_ANDNPS,
23692 IX86_BUILTIN_ORPS,
23693 IX86_BUILTIN_XORPS,
23694
23695 IX86_BUILTIN_EMMS,
23696 IX86_BUILTIN_LDMXCSR,
23697 IX86_BUILTIN_STMXCSR,
23698 IX86_BUILTIN_SFENCE,
23699
23700 /* 3DNow! Original */
23701 IX86_BUILTIN_FEMMS,
23702 IX86_BUILTIN_PAVGUSB,
23703 IX86_BUILTIN_PF2ID,
23704 IX86_BUILTIN_PFACC,
23705 IX86_BUILTIN_PFADD,
23706 IX86_BUILTIN_PFCMPEQ,
23707 IX86_BUILTIN_PFCMPGE,
23708 IX86_BUILTIN_PFCMPGT,
23709 IX86_BUILTIN_PFMAX,
23710 IX86_BUILTIN_PFMIN,
23711 IX86_BUILTIN_PFMUL,
23712 IX86_BUILTIN_PFRCP,
23713 IX86_BUILTIN_PFRCPIT1,
23714 IX86_BUILTIN_PFRCPIT2,
23715 IX86_BUILTIN_PFRSQIT1,
23716 IX86_BUILTIN_PFRSQRT,
23717 IX86_BUILTIN_PFSUB,
23718 IX86_BUILTIN_PFSUBR,
23719 IX86_BUILTIN_PI2FD,
23720 IX86_BUILTIN_PMULHRW,
23721
23722 /* 3DNow! Athlon Extensions */
23723 IX86_BUILTIN_PF2IW,
23724 IX86_BUILTIN_PFNACC,
23725 IX86_BUILTIN_PFPNACC,
23726 IX86_BUILTIN_PI2FW,
23727 IX86_BUILTIN_PSWAPDSI,
23728 IX86_BUILTIN_PSWAPDSF,
23729
23730 /* SSE2 */
23731 IX86_BUILTIN_ADDPD,
23732 IX86_BUILTIN_ADDSD,
23733 IX86_BUILTIN_DIVPD,
23734 IX86_BUILTIN_DIVSD,
23735 IX86_BUILTIN_MULPD,
23736 IX86_BUILTIN_MULSD,
23737 IX86_BUILTIN_SUBPD,
23738 IX86_BUILTIN_SUBSD,
23739
23740 IX86_BUILTIN_CMPEQPD,
23741 IX86_BUILTIN_CMPLTPD,
23742 IX86_BUILTIN_CMPLEPD,
23743 IX86_BUILTIN_CMPGTPD,
23744 IX86_BUILTIN_CMPGEPD,
23745 IX86_BUILTIN_CMPNEQPD,
23746 IX86_BUILTIN_CMPNLTPD,
23747 IX86_BUILTIN_CMPNLEPD,
23748 IX86_BUILTIN_CMPNGTPD,
23749 IX86_BUILTIN_CMPNGEPD,
23750 IX86_BUILTIN_CMPORDPD,
23751 IX86_BUILTIN_CMPUNORDPD,
23752 IX86_BUILTIN_CMPEQSD,
23753 IX86_BUILTIN_CMPLTSD,
23754 IX86_BUILTIN_CMPLESD,
23755 IX86_BUILTIN_CMPNEQSD,
23756 IX86_BUILTIN_CMPNLTSD,
23757 IX86_BUILTIN_CMPNLESD,
23758 IX86_BUILTIN_CMPORDSD,
23759 IX86_BUILTIN_CMPUNORDSD,
23760
23761 IX86_BUILTIN_COMIEQSD,
23762 IX86_BUILTIN_COMILTSD,
23763 IX86_BUILTIN_COMILESD,
23764 IX86_BUILTIN_COMIGTSD,
23765 IX86_BUILTIN_COMIGESD,
23766 IX86_BUILTIN_COMINEQSD,
23767 IX86_BUILTIN_UCOMIEQSD,
23768 IX86_BUILTIN_UCOMILTSD,
23769 IX86_BUILTIN_UCOMILESD,
23770 IX86_BUILTIN_UCOMIGTSD,
23771 IX86_BUILTIN_UCOMIGESD,
23772 IX86_BUILTIN_UCOMINEQSD,
23773
23774 IX86_BUILTIN_MAXPD,
23775 IX86_BUILTIN_MAXSD,
23776 IX86_BUILTIN_MINPD,
23777 IX86_BUILTIN_MINSD,
23778
23779 IX86_BUILTIN_ANDPD,
23780 IX86_BUILTIN_ANDNPD,
23781 IX86_BUILTIN_ORPD,
23782 IX86_BUILTIN_XORPD,
23783
23784 IX86_BUILTIN_SQRTPD,
23785 IX86_BUILTIN_SQRTSD,
23786
23787 IX86_BUILTIN_UNPCKHPD,
23788 IX86_BUILTIN_UNPCKLPD,
23789
23790 IX86_BUILTIN_SHUFPD,
23791
23792 IX86_BUILTIN_LOADUPD,
23793 IX86_BUILTIN_STOREUPD,
23794 IX86_BUILTIN_MOVSD,
23795
23796 IX86_BUILTIN_LOADHPD,
23797 IX86_BUILTIN_LOADLPD,
23798
23799 IX86_BUILTIN_CVTDQ2PD,
23800 IX86_BUILTIN_CVTDQ2PS,
23801
23802 IX86_BUILTIN_CVTPD2DQ,
23803 IX86_BUILTIN_CVTPD2PI,
23804 IX86_BUILTIN_CVTPD2PS,
23805 IX86_BUILTIN_CVTTPD2DQ,
23806 IX86_BUILTIN_CVTTPD2PI,
23807
23808 IX86_BUILTIN_CVTPI2PD,
23809 IX86_BUILTIN_CVTSI2SD,
23810 IX86_BUILTIN_CVTSI642SD,
23811
23812 IX86_BUILTIN_CVTSD2SI,
23813 IX86_BUILTIN_CVTSD2SI64,
23814 IX86_BUILTIN_CVTSD2SS,
23815 IX86_BUILTIN_CVTSS2SD,
23816 IX86_BUILTIN_CVTTSD2SI,
23817 IX86_BUILTIN_CVTTSD2SI64,
23818
23819 IX86_BUILTIN_CVTPS2DQ,
23820 IX86_BUILTIN_CVTPS2PD,
23821 IX86_BUILTIN_CVTTPS2DQ,
23822
23823 IX86_BUILTIN_MOVNTI,
23824 IX86_BUILTIN_MOVNTPD,
23825 IX86_BUILTIN_MOVNTDQ,
23826
23827 IX86_BUILTIN_MOVQ128,
23828
23829 /* SSE2 MMX */
23830 IX86_BUILTIN_MASKMOVDQU,
23831 IX86_BUILTIN_MOVMSKPD,
23832 IX86_BUILTIN_PMOVMSKB128,
23833
23834 IX86_BUILTIN_PACKSSWB128,
23835 IX86_BUILTIN_PACKSSDW128,
23836 IX86_BUILTIN_PACKUSWB128,
23837
23838 IX86_BUILTIN_PADDB128,
23839 IX86_BUILTIN_PADDW128,
23840 IX86_BUILTIN_PADDD128,
23841 IX86_BUILTIN_PADDQ128,
23842 IX86_BUILTIN_PADDSB128,
23843 IX86_BUILTIN_PADDSW128,
23844 IX86_BUILTIN_PADDUSB128,
23845 IX86_BUILTIN_PADDUSW128,
23846 IX86_BUILTIN_PSUBB128,
23847 IX86_BUILTIN_PSUBW128,
23848 IX86_BUILTIN_PSUBD128,
23849 IX86_BUILTIN_PSUBQ128,
23850 IX86_BUILTIN_PSUBSB128,
23851 IX86_BUILTIN_PSUBSW128,
23852 IX86_BUILTIN_PSUBUSB128,
23853 IX86_BUILTIN_PSUBUSW128,
23854
23855 IX86_BUILTIN_PAND128,
23856 IX86_BUILTIN_PANDN128,
23857 IX86_BUILTIN_POR128,
23858 IX86_BUILTIN_PXOR128,
23859
23860 IX86_BUILTIN_PAVGB128,
23861 IX86_BUILTIN_PAVGW128,
23862
23863 IX86_BUILTIN_PCMPEQB128,
23864 IX86_BUILTIN_PCMPEQW128,
23865 IX86_BUILTIN_PCMPEQD128,
23866 IX86_BUILTIN_PCMPGTB128,
23867 IX86_BUILTIN_PCMPGTW128,
23868 IX86_BUILTIN_PCMPGTD128,
23869
23870 IX86_BUILTIN_PMADDWD128,
23871
23872 IX86_BUILTIN_PMAXSW128,
23873 IX86_BUILTIN_PMAXUB128,
23874 IX86_BUILTIN_PMINSW128,
23875 IX86_BUILTIN_PMINUB128,
23876
23877 IX86_BUILTIN_PMULUDQ,
23878 IX86_BUILTIN_PMULUDQ128,
23879 IX86_BUILTIN_PMULHUW128,
23880 IX86_BUILTIN_PMULHW128,
23881 IX86_BUILTIN_PMULLW128,
23882
23883 IX86_BUILTIN_PSADBW128,
23884 IX86_BUILTIN_PSHUFHW,
23885 IX86_BUILTIN_PSHUFLW,
23886 IX86_BUILTIN_PSHUFD,
23887
23888 IX86_BUILTIN_PSLLDQI128,
23889 IX86_BUILTIN_PSLLWI128,
23890 IX86_BUILTIN_PSLLDI128,
23891 IX86_BUILTIN_PSLLQI128,
23892 IX86_BUILTIN_PSRAWI128,
23893 IX86_BUILTIN_PSRADI128,
23894 IX86_BUILTIN_PSRLDQI128,
23895 IX86_BUILTIN_PSRLWI128,
23896 IX86_BUILTIN_PSRLDI128,
23897 IX86_BUILTIN_PSRLQI128,
23898
23899 IX86_BUILTIN_PSLLDQ128,
23900 IX86_BUILTIN_PSLLW128,
23901 IX86_BUILTIN_PSLLD128,
23902 IX86_BUILTIN_PSLLQ128,
23903 IX86_BUILTIN_PSRAW128,
23904 IX86_BUILTIN_PSRAD128,
23905 IX86_BUILTIN_PSRLW128,
23906 IX86_BUILTIN_PSRLD128,
23907 IX86_BUILTIN_PSRLQ128,
23908
23909 IX86_BUILTIN_PUNPCKHBW128,
23910 IX86_BUILTIN_PUNPCKHWD128,
23911 IX86_BUILTIN_PUNPCKHDQ128,
23912 IX86_BUILTIN_PUNPCKHQDQ128,
23913 IX86_BUILTIN_PUNPCKLBW128,
23914 IX86_BUILTIN_PUNPCKLWD128,
23915 IX86_BUILTIN_PUNPCKLDQ128,
23916 IX86_BUILTIN_PUNPCKLQDQ128,
23917
23918 IX86_BUILTIN_CLFLUSH,
23919 IX86_BUILTIN_MFENCE,
23920 IX86_BUILTIN_LFENCE,
23921
23922 IX86_BUILTIN_BSRSI,
23923 IX86_BUILTIN_BSRDI,
23924 IX86_BUILTIN_RDPMC,
23925 IX86_BUILTIN_RDTSC,
23926 IX86_BUILTIN_RDTSCP,
23927 IX86_BUILTIN_ROLQI,
23928 IX86_BUILTIN_ROLHI,
23929 IX86_BUILTIN_RORQI,
23930 IX86_BUILTIN_RORHI,
23931
23932 /* SSE3. */
23933 IX86_BUILTIN_ADDSUBPS,
23934 IX86_BUILTIN_HADDPS,
23935 IX86_BUILTIN_HSUBPS,
23936 IX86_BUILTIN_MOVSHDUP,
23937 IX86_BUILTIN_MOVSLDUP,
23938 IX86_BUILTIN_ADDSUBPD,
23939 IX86_BUILTIN_HADDPD,
23940 IX86_BUILTIN_HSUBPD,
23941 IX86_BUILTIN_LDDQU,
23942
23943 IX86_BUILTIN_MONITOR,
23944 IX86_BUILTIN_MWAIT,
23945
23946 /* SSSE3. */
23947 IX86_BUILTIN_PHADDW,
23948 IX86_BUILTIN_PHADDD,
23949 IX86_BUILTIN_PHADDSW,
23950 IX86_BUILTIN_PHSUBW,
23951 IX86_BUILTIN_PHSUBD,
23952 IX86_BUILTIN_PHSUBSW,
23953 IX86_BUILTIN_PMADDUBSW,
23954 IX86_BUILTIN_PMULHRSW,
23955 IX86_BUILTIN_PSHUFB,
23956 IX86_BUILTIN_PSIGNB,
23957 IX86_BUILTIN_PSIGNW,
23958 IX86_BUILTIN_PSIGND,
23959 IX86_BUILTIN_PALIGNR,
23960 IX86_BUILTIN_PABSB,
23961 IX86_BUILTIN_PABSW,
23962 IX86_BUILTIN_PABSD,
23963
23964 IX86_BUILTIN_PHADDW128,
23965 IX86_BUILTIN_PHADDD128,
23966 IX86_BUILTIN_PHADDSW128,
23967 IX86_BUILTIN_PHSUBW128,
23968 IX86_BUILTIN_PHSUBD128,
23969 IX86_BUILTIN_PHSUBSW128,
23970 IX86_BUILTIN_PMADDUBSW128,
23971 IX86_BUILTIN_PMULHRSW128,
23972 IX86_BUILTIN_PSHUFB128,
23973 IX86_BUILTIN_PSIGNB128,
23974 IX86_BUILTIN_PSIGNW128,
23975 IX86_BUILTIN_PSIGND128,
23976 IX86_BUILTIN_PALIGNR128,
23977 IX86_BUILTIN_PABSB128,
23978 IX86_BUILTIN_PABSW128,
23979 IX86_BUILTIN_PABSD128,
23980
23981 /* AMDFAM10 - SSE4A New Instructions. */
23982 IX86_BUILTIN_MOVNTSD,
23983 IX86_BUILTIN_MOVNTSS,
23984 IX86_BUILTIN_EXTRQI,
23985 IX86_BUILTIN_EXTRQ,
23986 IX86_BUILTIN_INSERTQI,
23987 IX86_BUILTIN_INSERTQ,
23988
23989 /* SSE4.1. */
23990 IX86_BUILTIN_BLENDPD,
23991 IX86_BUILTIN_BLENDPS,
23992 IX86_BUILTIN_BLENDVPD,
23993 IX86_BUILTIN_BLENDVPS,
23994 IX86_BUILTIN_PBLENDVB128,
23995 IX86_BUILTIN_PBLENDW128,
23996
23997 IX86_BUILTIN_DPPD,
23998 IX86_BUILTIN_DPPS,
23999
24000 IX86_BUILTIN_INSERTPS128,
24001
24002 IX86_BUILTIN_MOVNTDQA,
24003 IX86_BUILTIN_MPSADBW128,
24004 IX86_BUILTIN_PACKUSDW128,
24005 IX86_BUILTIN_PCMPEQQ,
24006 IX86_BUILTIN_PHMINPOSUW128,
24007
24008 IX86_BUILTIN_PMAXSB128,
24009 IX86_BUILTIN_PMAXSD128,
24010 IX86_BUILTIN_PMAXUD128,
24011 IX86_BUILTIN_PMAXUW128,
24012
24013 IX86_BUILTIN_PMINSB128,
24014 IX86_BUILTIN_PMINSD128,
24015 IX86_BUILTIN_PMINUD128,
24016 IX86_BUILTIN_PMINUW128,
24017
24018 IX86_BUILTIN_PMOVSXBW128,
24019 IX86_BUILTIN_PMOVSXBD128,
24020 IX86_BUILTIN_PMOVSXBQ128,
24021 IX86_BUILTIN_PMOVSXWD128,
24022 IX86_BUILTIN_PMOVSXWQ128,
24023 IX86_BUILTIN_PMOVSXDQ128,
24024
24025 IX86_BUILTIN_PMOVZXBW128,
24026 IX86_BUILTIN_PMOVZXBD128,
24027 IX86_BUILTIN_PMOVZXBQ128,
24028 IX86_BUILTIN_PMOVZXWD128,
24029 IX86_BUILTIN_PMOVZXWQ128,
24030 IX86_BUILTIN_PMOVZXDQ128,
24031
24032 IX86_BUILTIN_PMULDQ128,
24033 IX86_BUILTIN_PMULLD128,
24034
24035 IX86_BUILTIN_ROUNDPD,
24036 IX86_BUILTIN_ROUNDPS,
24037 IX86_BUILTIN_ROUNDSD,
24038 IX86_BUILTIN_ROUNDSS,
24039
24040 IX86_BUILTIN_FLOORPD,
24041 IX86_BUILTIN_CEILPD,
24042 IX86_BUILTIN_TRUNCPD,
24043 IX86_BUILTIN_RINTPD,
24044 IX86_BUILTIN_FLOORPS,
24045 IX86_BUILTIN_CEILPS,
24046 IX86_BUILTIN_TRUNCPS,
24047 IX86_BUILTIN_RINTPS,
24048
24049 IX86_BUILTIN_PTESTZ,
24050 IX86_BUILTIN_PTESTC,
24051 IX86_BUILTIN_PTESTNZC,
24052
24053 IX86_BUILTIN_VEC_INIT_V2SI,
24054 IX86_BUILTIN_VEC_INIT_V4HI,
24055 IX86_BUILTIN_VEC_INIT_V8QI,
24056 IX86_BUILTIN_VEC_EXT_V2DF,
24057 IX86_BUILTIN_VEC_EXT_V2DI,
24058 IX86_BUILTIN_VEC_EXT_V4SF,
24059 IX86_BUILTIN_VEC_EXT_V4SI,
24060 IX86_BUILTIN_VEC_EXT_V8HI,
24061 IX86_BUILTIN_VEC_EXT_V2SI,
24062 IX86_BUILTIN_VEC_EXT_V4HI,
24063 IX86_BUILTIN_VEC_EXT_V16QI,
24064 IX86_BUILTIN_VEC_SET_V2DI,
24065 IX86_BUILTIN_VEC_SET_V4SF,
24066 IX86_BUILTIN_VEC_SET_V4SI,
24067 IX86_BUILTIN_VEC_SET_V8HI,
24068 IX86_BUILTIN_VEC_SET_V4HI,
24069 IX86_BUILTIN_VEC_SET_V16QI,
24070
24071 IX86_BUILTIN_VEC_PACK_SFIX,
24072
24073 /* SSE4.2. */
24074 IX86_BUILTIN_CRC32QI,
24075 IX86_BUILTIN_CRC32HI,
24076 IX86_BUILTIN_CRC32SI,
24077 IX86_BUILTIN_CRC32DI,
24078
24079 IX86_BUILTIN_PCMPESTRI128,
24080 IX86_BUILTIN_PCMPESTRM128,
24081 IX86_BUILTIN_PCMPESTRA128,
24082 IX86_BUILTIN_PCMPESTRC128,
24083 IX86_BUILTIN_PCMPESTRO128,
24084 IX86_BUILTIN_PCMPESTRS128,
24085 IX86_BUILTIN_PCMPESTRZ128,
24086 IX86_BUILTIN_PCMPISTRI128,
24087 IX86_BUILTIN_PCMPISTRM128,
24088 IX86_BUILTIN_PCMPISTRA128,
24089 IX86_BUILTIN_PCMPISTRC128,
24090 IX86_BUILTIN_PCMPISTRO128,
24091 IX86_BUILTIN_PCMPISTRS128,
24092 IX86_BUILTIN_PCMPISTRZ128,
24093
24094 IX86_BUILTIN_PCMPGTQ,
24095
24096 /* AES instructions */
24097 IX86_BUILTIN_AESENC128,
24098 IX86_BUILTIN_AESENCLAST128,
24099 IX86_BUILTIN_AESDEC128,
24100 IX86_BUILTIN_AESDECLAST128,
24101 IX86_BUILTIN_AESIMC128,
24102 IX86_BUILTIN_AESKEYGENASSIST128,
24103
24104 /* PCLMUL instruction */
24105 IX86_BUILTIN_PCLMULQDQ128,
24106
24107 /* AVX */
24108 IX86_BUILTIN_ADDPD256,
24109 IX86_BUILTIN_ADDPS256,
24110 IX86_BUILTIN_ADDSUBPD256,
24111 IX86_BUILTIN_ADDSUBPS256,
24112 IX86_BUILTIN_ANDPD256,
24113 IX86_BUILTIN_ANDPS256,
24114 IX86_BUILTIN_ANDNPD256,
24115 IX86_BUILTIN_ANDNPS256,
24116 IX86_BUILTIN_BLENDPD256,
24117 IX86_BUILTIN_BLENDPS256,
24118 IX86_BUILTIN_BLENDVPD256,
24119 IX86_BUILTIN_BLENDVPS256,
24120 IX86_BUILTIN_DIVPD256,
24121 IX86_BUILTIN_DIVPS256,
24122 IX86_BUILTIN_DPPS256,
24123 IX86_BUILTIN_HADDPD256,
24124 IX86_BUILTIN_HADDPS256,
24125 IX86_BUILTIN_HSUBPD256,
24126 IX86_BUILTIN_HSUBPS256,
24127 IX86_BUILTIN_MAXPD256,
24128 IX86_BUILTIN_MAXPS256,
24129 IX86_BUILTIN_MINPD256,
24130 IX86_BUILTIN_MINPS256,
24131 IX86_BUILTIN_MULPD256,
24132 IX86_BUILTIN_MULPS256,
24133 IX86_BUILTIN_ORPD256,
24134 IX86_BUILTIN_ORPS256,
24135 IX86_BUILTIN_SHUFPD256,
24136 IX86_BUILTIN_SHUFPS256,
24137 IX86_BUILTIN_SUBPD256,
24138 IX86_BUILTIN_SUBPS256,
24139 IX86_BUILTIN_XORPD256,
24140 IX86_BUILTIN_XORPS256,
24141 IX86_BUILTIN_CMPSD,
24142 IX86_BUILTIN_CMPSS,
24143 IX86_BUILTIN_CMPPD,
24144 IX86_BUILTIN_CMPPS,
24145 IX86_BUILTIN_CMPPD256,
24146 IX86_BUILTIN_CMPPS256,
24147 IX86_BUILTIN_CVTDQ2PD256,
24148 IX86_BUILTIN_CVTDQ2PS256,
24149 IX86_BUILTIN_CVTPD2PS256,
24150 IX86_BUILTIN_CVTPS2DQ256,
24151 IX86_BUILTIN_CVTPS2PD256,
24152 IX86_BUILTIN_CVTTPD2DQ256,
24153 IX86_BUILTIN_CVTPD2DQ256,
24154 IX86_BUILTIN_CVTTPS2DQ256,
24155 IX86_BUILTIN_EXTRACTF128PD256,
24156 IX86_BUILTIN_EXTRACTF128PS256,
24157 IX86_BUILTIN_EXTRACTF128SI256,
24158 IX86_BUILTIN_VZEROALL,
24159 IX86_BUILTIN_VZEROUPPER,
24160 IX86_BUILTIN_VPERMILVARPD,
24161 IX86_BUILTIN_VPERMILVARPS,
24162 IX86_BUILTIN_VPERMILVARPD256,
24163 IX86_BUILTIN_VPERMILVARPS256,
24164 IX86_BUILTIN_VPERMILPD,
24165 IX86_BUILTIN_VPERMILPS,
24166 IX86_BUILTIN_VPERMILPD256,
24167 IX86_BUILTIN_VPERMILPS256,
24168 IX86_BUILTIN_VPERMIL2PD,
24169 IX86_BUILTIN_VPERMIL2PS,
24170 IX86_BUILTIN_VPERMIL2PD256,
24171 IX86_BUILTIN_VPERMIL2PS256,
24172 IX86_BUILTIN_VPERM2F128PD256,
24173 IX86_BUILTIN_VPERM2F128PS256,
24174 IX86_BUILTIN_VPERM2F128SI256,
24175 IX86_BUILTIN_VBROADCASTSS,
24176 IX86_BUILTIN_VBROADCASTSD256,
24177 IX86_BUILTIN_VBROADCASTSS256,
24178 IX86_BUILTIN_VBROADCASTPD256,
24179 IX86_BUILTIN_VBROADCASTPS256,
24180 IX86_BUILTIN_VINSERTF128PD256,
24181 IX86_BUILTIN_VINSERTF128PS256,
24182 IX86_BUILTIN_VINSERTF128SI256,
24183 IX86_BUILTIN_LOADUPD256,
24184 IX86_BUILTIN_LOADUPS256,
24185 IX86_BUILTIN_STOREUPD256,
24186 IX86_BUILTIN_STOREUPS256,
24187 IX86_BUILTIN_LDDQU256,
24188 IX86_BUILTIN_MOVNTDQ256,
24189 IX86_BUILTIN_MOVNTPD256,
24190 IX86_BUILTIN_MOVNTPS256,
24191 IX86_BUILTIN_LOADDQU256,
24192 IX86_BUILTIN_STOREDQU256,
24193 IX86_BUILTIN_MASKLOADPD,
24194 IX86_BUILTIN_MASKLOADPS,
24195 IX86_BUILTIN_MASKSTOREPD,
24196 IX86_BUILTIN_MASKSTOREPS,
24197 IX86_BUILTIN_MASKLOADPD256,
24198 IX86_BUILTIN_MASKLOADPS256,
24199 IX86_BUILTIN_MASKSTOREPD256,
24200 IX86_BUILTIN_MASKSTOREPS256,
24201 IX86_BUILTIN_MOVSHDUP256,
24202 IX86_BUILTIN_MOVSLDUP256,
24203 IX86_BUILTIN_MOVDDUP256,
24204
24205 IX86_BUILTIN_SQRTPD256,
24206 IX86_BUILTIN_SQRTPS256,
24207 IX86_BUILTIN_SQRTPS_NR256,
24208 IX86_BUILTIN_RSQRTPS256,
24209 IX86_BUILTIN_RSQRTPS_NR256,
24210
24211 IX86_BUILTIN_RCPPS256,
24212
24213 IX86_BUILTIN_ROUNDPD256,
24214 IX86_BUILTIN_ROUNDPS256,
24215
24216 IX86_BUILTIN_FLOORPD256,
24217 IX86_BUILTIN_CEILPD256,
24218 IX86_BUILTIN_TRUNCPD256,
24219 IX86_BUILTIN_RINTPD256,
24220 IX86_BUILTIN_FLOORPS256,
24221 IX86_BUILTIN_CEILPS256,
24222 IX86_BUILTIN_TRUNCPS256,
24223 IX86_BUILTIN_RINTPS256,
24224
24225 IX86_BUILTIN_UNPCKHPD256,
24226 IX86_BUILTIN_UNPCKLPD256,
24227 IX86_BUILTIN_UNPCKHPS256,
24228 IX86_BUILTIN_UNPCKLPS256,
24229
24230 IX86_BUILTIN_SI256_SI,
24231 IX86_BUILTIN_PS256_PS,
24232 IX86_BUILTIN_PD256_PD,
24233 IX86_BUILTIN_SI_SI256,
24234 IX86_BUILTIN_PS_PS256,
24235 IX86_BUILTIN_PD_PD256,
24236
24237 IX86_BUILTIN_VTESTZPD,
24238 IX86_BUILTIN_VTESTCPD,
24239 IX86_BUILTIN_VTESTNZCPD,
24240 IX86_BUILTIN_VTESTZPS,
24241 IX86_BUILTIN_VTESTCPS,
24242 IX86_BUILTIN_VTESTNZCPS,
24243 IX86_BUILTIN_VTESTZPD256,
24244 IX86_BUILTIN_VTESTCPD256,
24245 IX86_BUILTIN_VTESTNZCPD256,
24246 IX86_BUILTIN_VTESTZPS256,
24247 IX86_BUILTIN_VTESTCPS256,
24248 IX86_BUILTIN_VTESTNZCPS256,
24249 IX86_BUILTIN_PTESTZ256,
24250 IX86_BUILTIN_PTESTC256,
24251 IX86_BUILTIN_PTESTNZC256,
24252
24253 IX86_BUILTIN_MOVMSKPD256,
24254 IX86_BUILTIN_MOVMSKPS256,
24255
24256 /* TFmode support builtins. */
24257 IX86_BUILTIN_INFQ,
24258 IX86_BUILTIN_HUGE_VALQ,
24259 IX86_BUILTIN_FABSQ,
24260 IX86_BUILTIN_COPYSIGNQ,
24261
24262 /* Vectorizer support builtins. */
24263 IX86_BUILTIN_CPYSGNPS,
24264 IX86_BUILTIN_CPYSGNPD,
24265 IX86_BUILTIN_CPYSGNPS256,
24266 IX86_BUILTIN_CPYSGNPD256,
24267
24268 IX86_BUILTIN_CVTUDQ2PS,
24269
24270 IX86_BUILTIN_VEC_PERM_V2DF,
24271 IX86_BUILTIN_VEC_PERM_V4SF,
24272 IX86_BUILTIN_VEC_PERM_V2DI,
24273 IX86_BUILTIN_VEC_PERM_V4SI,
24274 IX86_BUILTIN_VEC_PERM_V8HI,
24275 IX86_BUILTIN_VEC_PERM_V16QI,
24276 IX86_BUILTIN_VEC_PERM_V2DI_U,
24277 IX86_BUILTIN_VEC_PERM_V4SI_U,
24278 IX86_BUILTIN_VEC_PERM_V8HI_U,
24279 IX86_BUILTIN_VEC_PERM_V16QI_U,
24280 IX86_BUILTIN_VEC_PERM_V4DF,
24281 IX86_BUILTIN_VEC_PERM_V8SF,
24282
24283 /* FMA4 and XOP instructions. */
24284 IX86_BUILTIN_VFMADDSS,
24285 IX86_BUILTIN_VFMADDSD,
24286 IX86_BUILTIN_VFMADDPS,
24287 IX86_BUILTIN_VFMADDPD,
24288 IX86_BUILTIN_VFMADDPS256,
24289 IX86_BUILTIN_VFMADDPD256,
24290 IX86_BUILTIN_VFMADDSUBPS,
24291 IX86_BUILTIN_VFMADDSUBPD,
24292 IX86_BUILTIN_VFMADDSUBPS256,
24293 IX86_BUILTIN_VFMADDSUBPD256,
24294
24295 IX86_BUILTIN_VPCMOV,
24296 IX86_BUILTIN_VPCMOV_V2DI,
24297 IX86_BUILTIN_VPCMOV_V4SI,
24298 IX86_BUILTIN_VPCMOV_V8HI,
24299 IX86_BUILTIN_VPCMOV_V16QI,
24300 IX86_BUILTIN_VPCMOV_V4SF,
24301 IX86_BUILTIN_VPCMOV_V2DF,
24302 IX86_BUILTIN_VPCMOV256,
24303 IX86_BUILTIN_VPCMOV_V4DI256,
24304 IX86_BUILTIN_VPCMOV_V8SI256,
24305 IX86_BUILTIN_VPCMOV_V16HI256,
24306 IX86_BUILTIN_VPCMOV_V32QI256,
24307 IX86_BUILTIN_VPCMOV_V8SF256,
24308 IX86_BUILTIN_VPCMOV_V4DF256,
24309
24310 IX86_BUILTIN_VPPERM,
24311
24312 IX86_BUILTIN_VPMACSSWW,
24313 IX86_BUILTIN_VPMACSWW,
24314 IX86_BUILTIN_VPMACSSWD,
24315 IX86_BUILTIN_VPMACSWD,
24316 IX86_BUILTIN_VPMACSSDD,
24317 IX86_BUILTIN_VPMACSDD,
24318 IX86_BUILTIN_VPMACSSDQL,
24319 IX86_BUILTIN_VPMACSSDQH,
24320 IX86_BUILTIN_VPMACSDQL,
24321 IX86_BUILTIN_VPMACSDQH,
24322 IX86_BUILTIN_VPMADCSSWD,
24323 IX86_BUILTIN_VPMADCSWD,
24324
24325 IX86_BUILTIN_VPHADDBW,
24326 IX86_BUILTIN_VPHADDBD,
24327 IX86_BUILTIN_VPHADDBQ,
24328 IX86_BUILTIN_VPHADDWD,
24329 IX86_BUILTIN_VPHADDWQ,
24330 IX86_BUILTIN_VPHADDDQ,
24331 IX86_BUILTIN_VPHADDUBW,
24332 IX86_BUILTIN_VPHADDUBD,
24333 IX86_BUILTIN_VPHADDUBQ,
24334 IX86_BUILTIN_VPHADDUWD,
24335 IX86_BUILTIN_VPHADDUWQ,
24336 IX86_BUILTIN_VPHADDUDQ,
24337 IX86_BUILTIN_VPHSUBBW,
24338 IX86_BUILTIN_VPHSUBWD,
24339 IX86_BUILTIN_VPHSUBDQ,
24340
24341 IX86_BUILTIN_VPROTB,
24342 IX86_BUILTIN_VPROTW,
24343 IX86_BUILTIN_VPROTD,
24344 IX86_BUILTIN_VPROTQ,
24345 IX86_BUILTIN_VPROTB_IMM,
24346 IX86_BUILTIN_VPROTW_IMM,
24347 IX86_BUILTIN_VPROTD_IMM,
24348 IX86_BUILTIN_VPROTQ_IMM,
24349
24350 IX86_BUILTIN_VPSHLB,
24351 IX86_BUILTIN_VPSHLW,
24352 IX86_BUILTIN_VPSHLD,
24353 IX86_BUILTIN_VPSHLQ,
24354 IX86_BUILTIN_VPSHAB,
24355 IX86_BUILTIN_VPSHAW,
24356 IX86_BUILTIN_VPSHAD,
24357 IX86_BUILTIN_VPSHAQ,
24358
24359 IX86_BUILTIN_VFRCZSS,
24360 IX86_BUILTIN_VFRCZSD,
24361 IX86_BUILTIN_VFRCZPS,
24362 IX86_BUILTIN_VFRCZPD,
24363 IX86_BUILTIN_VFRCZPS256,
24364 IX86_BUILTIN_VFRCZPD256,
24365
24366 IX86_BUILTIN_VPCOMEQUB,
24367 IX86_BUILTIN_VPCOMNEUB,
24368 IX86_BUILTIN_VPCOMLTUB,
24369 IX86_BUILTIN_VPCOMLEUB,
24370 IX86_BUILTIN_VPCOMGTUB,
24371 IX86_BUILTIN_VPCOMGEUB,
24372 IX86_BUILTIN_VPCOMFALSEUB,
24373 IX86_BUILTIN_VPCOMTRUEUB,
24374
24375 IX86_BUILTIN_VPCOMEQUW,
24376 IX86_BUILTIN_VPCOMNEUW,
24377 IX86_BUILTIN_VPCOMLTUW,
24378 IX86_BUILTIN_VPCOMLEUW,
24379 IX86_BUILTIN_VPCOMGTUW,
24380 IX86_BUILTIN_VPCOMGEUW,
24381 IX86_BUILTIN_VPCOMFALSEUW,
24382 IX86_BUILTIN_VPCOMTRUEUW,
24383
24384 IX86_BUILTIN_VPCOMEQUD,
24385 IX86_BUILTIN_VPCOMNEUD,
24386 IX86_BUILTIN_VPCOMLTUD,
24387 IX86_BUILTIN_VPCOMLEUD,
24388 IX86_BUILTIN_VPCOMGTUD,
24389 IX86_BUILTIN_VPCOMGEUD,
24390 IX86_BUILTIN_VPCOMFALSEUD,
24391 IX86_BUILTIN_VPCOMTRUEUD,
24392
24393 IX86_BUILTIN_VPCOMEQUQ,
24394 IX86_BUILTIN_VPCOMNEUQ,
24395 IX86_BUILTIN_VPCOMLTUQ,
24396 IX86_BUILTIN_VPCOMLEUQ,
24397 IX86_BUILTIN_VPCOMGTUQ,
24398 IX86_BUILTIN_VPCOMGEUQ,
24399 IX86_BUILTIN_VPCOMFALSEUQ,
24400 IX86_BUILTIN_VPCOMTRUEUQ,
24401
24402 IX86_BUILTIN_VPCOMEQB,
24403 IX86_BUILTIN_VPCOMNEB,
24404 IX86_BUILTIN_VPCOMLTB,
24405 IX86_BUILTIN_VPCOMLEB,
24406 IX86_BUILTIN_VPCOMGTB,
24407 IX86_BUILTIN_VPCOMGEB,
24408 IX86_BUILTIN_VPCOMFALSEB,
24409 IX86_BUILTIN_VPCOMTRUEB,
24410
24411 IX86_BUILTIN_VPCOMEQW,
24412 IX86_BUILTIN_VPCOMNEW,
24413 IX86_BUILTIN_VPCOMLTW,
24414 IX86_BUILTIN_VPCOMLEW,
24415 IX86_BUILTIN_VPCOMGTW,
24416 IX86_BUILTIN_VPCOMGEW,
24417 IX86_BUILTIN_VPCOMFALSEW,
24418 IX86_BUILTIN_VPCOMTRUEW,
24419
24420 IX86_BUILTIN_VPCOMEQD,
24421 IX86_BUILTIN_VPCOMNED,
24422 IX86_BUILTIN_VPCOMLTD,
24423 IX86_BUILTIN_VPCOMLED,
24424 IX86_BUILTIN_VPCOMGTD,
24425 IX86_BUILTIN_VPCOMGED,
24426 IX86_BUILTIN_VPCOMFALSED,
24427 IX86_BUILTIN_VPCOMTRUED,
24428
24429 IX86_BUILTIN_VPCOMEQQ,
24430 IX86_BUILTIN_VPCOMNEQ,
24431 IX86_BUILTIN_VPCOMLTQ,
24432 IX86_BUILTIN_VPCOMLEQ,
24433 IX86_BUILTIN_VPCOMGTQ,
24434 IX86_BUILTIN_VPCOMGEQ,
24435 IX86_BUILTIN_VPCOMFALSEQ,
24436 IX86_BUILTIN_VPCOMTRUEQ,
24437
24438 /* LWP instructions. */
24439 IX86_BUILTIN_LLWPCB,
24440 IX86_BUILTIN_SLWPCB,
24441 IX86_BUILTIN_LWPVAL32,
24442 IX86_BUILTIN_LWPVAL64,
24443 IX86_BUILTIN_LWPINS32,
24444 IX86_BUILTIN_LWPINS64,
24445
24446 IX86_BUILTIN_CLZS,
24447
24448 /* BMI instructions. */
24449 IX86_BUILTIN_BEXTR32,
24450 IX86_BUILTIN_BEXTR64,
24451 IX86_BUILTIN_CTZS,
24452
24453 /* TBM instructions. */
24454 IX86_BUILTIN_BEXTRI32,
24455 IX86_BUILTIN_BEXTRI64,
24456
24457
24458 /* FSGSBASE instructions. */
24459 IX86_BUILTIN_RDFSBASE32,
24460 IX86_BUILTIN_RDFSBASE64,
24461 IX86_BUILTIN_RDGSBASE32,
24462 IX86_BUILTIN_RDGSBASE64,
24463 IX86_BUILTIN_WRFSBASE32,
24464 IX86_BUILTIN_WRFSBASE64,
24465 IX86_BUILTIN_WRGSBASE32,
24466 IX86_BUILTIN_WRGSBASE64,
24467
24468 /* RDRND instructions. */
24469 IX86_BUILTIN_RDRAND16_STEP,
24470 IX86_BUILTIN_RDRAND32_STEP,
24471 IX86_BUILTIN_RDRAND64_STEP,
24472
24473 /* F16C instructions. */
24474 IX86_BUILTIN_CVTPH2PS,
24475 IX86_BUILTIN_CVTPH2PS256,
24476 IX86_BUILTIN_CVTPS2PH,
24477 IX86_BUILTIN_CVTPS2PH256,
24478
24479 /* CFString built-in for darwin */
24480 IX86_BUILTIN_CFSTRING,
24481
24482 IX86_BUILTIN_MAX
24483 };
24484
24485 /* Table for the ix86 builtin decls. */
24486 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24487
24488 /* Table of all of the builtin functions that are possible with different ISA's
24489 but are waiting to be built until a function is declared to use that
24490 ISA. */
24491 struct builtin_isa {
24492 const char *name; /* function name */
24493 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24494 int isa; /* isa_flags this builtin is defined for */
24495 bool const_p; /* true if the declaration is constant */
24496 bool set_and_not_built_p;
24497 };
24498
24499 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24500
24501
24502 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24503 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24504 function decl in the ix86_builtins array. Returns the function decl or
24505 NULL_TREE, if the builtin was not added.
24506
24507 If the front end has a special hook for builtin functions, delay adding
24508 builtin functions that aren't in the current ISA until the ISA is changed
24509 with function specific optimization. Doing so, can save about 300K for the
24510 default compiler. When the builtin is expanded, check at that time whether
24511 it is valid.
24512
24513 If the front end doesn't have a special hook, record all builtins, even if
24514 it isn't an instruction set in the current ISA in case the user uses
24515 function specific options for a different ISA, so that we don't get scope
24516 errors if a builtin is added in the middle of a function scope. */
24517
24518 static inline tree
24519 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24520 enum ix86_builtins code)
24521 {
24522 tree decl = NULL_TREE;
24523
24524 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24525 {
24526 ix86_builtins_isa[(int) code].isa = mask;
24527
24528 mask &= ~OPTION_MASK_ISA_64BIT;
24529 if (mask == 0
24530 || (mask & ix86_isa_flags) != 0
24531 || (lang_hooks.builtin_function
24532 == lang_hooks.builtin_function_ext_scope))
24533
24534 {
24535 tree type = ix86_get_builtin_func_type (tcode);
24536 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24537 NULL, NULL_TREE);
24538 ix86_builtins[(int) code] = decl;
24539 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24540 }
24541 else
24542 {
24543 ix86_builtins[(int) code] = NULL_TREE;
24544 ix86_builtins_isa[(int) code].tcode = tcode;
24545 ix86_builtins_isa[(int) code].name = name;
24546 ix86_builtins_isa[(int) code].const_p = false;
24547 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24548 }
24549 }
24550
24551 return decl;
24552 }
24553
24554 /* Like def_builtin, but also marks the function decl "const". */
24555
24556 static inline tree
24557 def_builtin_const (int mask, const char *name,
24558 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24559 {
24560 tree decl = def_builtin (mask, name, tcode, code);
24561 if (decl)
24562 TREE_READONLY (decl) = 1;
24563 else
24564 ix86_builtins_isa[(int) code].const_p = true;
24565
24566 return decl;
24567 }
24568
24569 /* Add any new builtin functions for a given ISA that may not have been
24570 declared. This saves a bit of space compared to adding all of the
24571 declarations to the tree, even if we didn't use them. */
24572
24573 static void
24574 ix86_add_new_builtins (int isa)
24575 {
24576 int i;
24577
24578 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24579 {
24580 if ((ix86_builtins_isa[i].isa & isa) != 0
24581 && ix86_builtins_isa[i].set_and_not_built_p)
24582 {
24583 tree decl, type;
24584
24585 /* Don't define the builtin again. */
24586 ix86_builtins_isa[i].set_and_not_built_p = false;
24587
24588 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24589 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24590 type, i, BUILT_IN_MD, NULL,
24591 NULL_TREE);
24592
24593 ix86_builtins[i] = decl;
24594 if (ix86_builtins_isa[i].const_p)
24595 TREE_READONLY (decl) = 1;
24596 }
24597 }
24598 }
24599
24600 /* Bits for builtin_description.flag. */
24601
24602 /* Set when we don't support the comparison natively, and should
24603 swap_comparison in order to support it. */
24604 #define BUILTIN_DESC_SWAP_OPERANDS 1
24605
24606 struct builtin_description
24607 {
24608 const unsigned int mask;
24609 const enum insn_code icode;
24610 const char *const name;
24611 const enum ix86_builtins code;
24612 const enum rtx_code comparison;
24613 const int flag;
24614 };
24615
24616 static const struct builtin_description bdesc_comi[] =
24617 {
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24642 };
24643
24644 static const struct builtin_description bdesc_pcmpestr[] =
24645 {
24646 /* SSE4.2 */
24647 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24649 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24651 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24652 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24654 };
24655
24656 static const struct builtin_description bdesc_pcmpistr[] =
24657 {
24658 /* SSE4.2 */
24659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24660 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24661 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24662 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24663 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24664 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24665 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24666 };
24667
24668 /* Special builtins with variable number of arguments. */
24669 static const struct builtin_description bdesc_special_args[] =
24670 {
24671 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24672 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24673
24674 /* MMX */
24675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24676
24677 /* 3DNow! */
24678 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24679
24680 /* SSE */
24681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24684
24685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24689
24690 /* SSE or 3DNow!A */
24691 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24692 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24693
24694 /* SSE2 */
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24704
24705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24707
24708 /* SSE3 */
24709 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24710
24711 /* SSE4.1 */
24712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24713
24714 /* SSE4A */
24715 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24716 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24717
24718 /* AVX */
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24721
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24724 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24727
24728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24735
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24739
24740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24748
24749 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24750 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24751 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24752 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24753 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24754 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24755
24756 /* FSGSBASE */
24757 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24758 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24759 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24760 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24761 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24762 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24763 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24764 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24765 };
24766
24767 /* Builtins with variable number of arguments. */
24768 static const struct builtin_description bdesc_args[] =
24769 {
24770 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24771 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24772 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24773 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24774 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24775 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24776 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24777
24778 /* MMX */
24779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24785
24786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24794
24795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24797
24798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24802
24803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24809
24810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24816
24817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24820
24821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24822
24823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24829
24830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24836
24837 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24841
24842 /* 3DNow! */
24843 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24844 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24845 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24846 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24847
24848 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24849 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24850 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24852 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24853 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24854 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24855 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24856 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24857 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24858 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24859 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24860 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24861 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24862 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24863
24864 /* 3DNow!A */
24865 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24866 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24867 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24868 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24869 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24870 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24871
24872 /* SSE */
24873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24875 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24877 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24881 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24884 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24885
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24887
24888 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24890 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24896
24897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24919
24920 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24921 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24924
24925 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24927 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24928 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24929
24930 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24931
24932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24935 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24936 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24937
24938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24941
24942 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24943
24944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24947
24948 /* SSE MMX or 3Dnow!A */
24949 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24951 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24952
24953 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24954 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24955 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24956 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24957
24958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24959 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24960
24961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24962
24963 /* SSE2 */
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24965
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24967 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24977 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24978
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24985
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24991
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24993
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24996 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24997 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24998
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25002
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25011
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25032
25033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25037
25038 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25041 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25042
25043 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25044
25045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25047 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25048
25049 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25050
25051 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25052 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25053 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25054 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25055 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25056 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25057 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25058 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25059
25060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25068
25069 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25070 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25071
25072 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25074 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25075 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25076
25077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25079
25080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25086
25087 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25088 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25089 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25091
25092 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25093 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25094 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25095 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25096 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25098 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25099 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25100
25101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25104
25105 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25107
25108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25110
25111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25112
25113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25114 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25117
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25119 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25120 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25121 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25122 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25123 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25124 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25125
25126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25127 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25128 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25129 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25130 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25131 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25132 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25133
25134 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25135 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25136 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25137 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25138
25139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25142
25143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25144
25145 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25146 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25147
25148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25149
25150 /* SSE2 MMX */
25151 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25152 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25153
25154 /* SSE3 */
25155 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25156 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25157
25158 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25159 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25160 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25161 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25162 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25163 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25164
25165 /* SSSE3 */
25166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25172
25173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25187 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25188 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25189 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25190 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25191 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25192 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25196 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25197
25198 /* SSSE3. */
25199 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25200 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25201
25202 /* SSE4.1 */
25203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25207 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25213
25214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25227
25228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25240
25241 /* SSE4.1 */
25242 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25245 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25246
25247 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25248 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25249 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25250 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25251
25252 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25253 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25254 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25255 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25256
25257 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25258 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25259 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25260
25261 /* SSE4.2 */
25262 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25263 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25264 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25265 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25266 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25267
25268 /* SSE4A */
25269 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25270 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25271 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25272 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25273
25274 /* AES */
25275 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25276 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25277
25278 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25279 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25280 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25281 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25282
25283 /* PCLMUL */
25284 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25285
25286 /* AVX */
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25302 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25303 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25304 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25306 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25307 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25308 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25309 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25310 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25311 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25312 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25313
25314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25318
25319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25353
25354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25357
25358 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25360 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25362 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25363
25364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25365
25366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25368
25369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25373
25374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25378
25379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25383
25384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25387 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25388 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25389 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25390
25391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25406
25407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25409
25410 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25411 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25412
25413 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25414
25415 /* BMI */
25416 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25417 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25418 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25419
25420 /* TBM */
25421 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25422 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25423
25424 /* F16C */
25425 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25426 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25427 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25428 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25429 };
25430
25431 /* FMA4 and XOP. */
25432 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25433 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25434 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25435 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25436 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25437 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25438 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25439 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25440 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25441 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25442 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25443 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25444 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25445 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25446 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25447 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25448 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25449 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25450 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25451 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25452 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25453 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25454 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25455 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25456 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25457 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25458 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25459 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25460 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25461 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25462 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25463 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25464 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25465 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25466 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25467 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25468 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25469 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25470 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25471 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25472 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25473 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25474 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25475 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25476 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25477 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25478 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25479 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25480 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25481 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25482 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25483 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25484
25485 static const struct builtin_description bdesc_multi_arg[] =
25486 {
25487 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25488 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25489 UNKNOWN, (int)MULTI_ARG_3_SF },
25490 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25491 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25492 UNKNOWN, (int)MULTI_ARG_3_DF },
25493
25494 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25495 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25496 UNKNOWN, (int)MULTI_ARG_3_SF },
25497 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25498 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25499 UNKNOWN, (int)MULTI_ARG_3_DF },
25500 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25501 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25502 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25503 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25504 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25505 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25506
25507 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25508 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25509 UNKNOWN, (int)MULTI_ARG_3_SF },
25510 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25511 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25512 UNKNOWN, (int)MULTI_ARG_3_DF },
25513 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25514 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25515 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25516 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25517 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25518 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25519
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25527
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25535
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25537
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25550
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25567
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25574
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25590
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25598
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25606
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25614
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25622
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25630
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25638
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25646
25647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25654
25655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25663
25664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25672
25673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25677
25678 };
25679
25680 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25681 in the current target ISA to allow the user to compile particular modules
25682 with different target specific options that differ from the command line
25683 options. */
25684 static void
25685 ix86_init_mmx_sse_builtins (void)
25686 {
25687 const struct builtin_description * d;
25688 enum ix86_builtin_func_type ftype;
25689 size_t i;
25690
25691 /* Add all special builtins with variable number of operands. */
25692 for (i = 0, d = bdesc_special_args;
25693 i < ARRAY_SIZE (bdesc_special_args);
25694 i++, d++)
25695 {
25696 if (d->name == 0)
25697 continue;
25698
25699 ftype = (enum ix86_builtin_func_type) d->flag;
25700 def_builtin (d->mask, d->name, ftype, d->code);
25701 }
25702
25703 /* Add all builtins with variable number of operands. */
25704 for (i = 0, d = bdesc_args;
25705 i < ARRAY_SIZE (bdesc_args);
25706 i++, d++)
25707 {
25708 if (d->name == 0)
25709 continue;
25710
25711 ftype = (enum ix86_builtin_func_type) d->flag;
25712 def_builtin_const (d->mask, d->name, ftype, d->code);
25713 }
25714
25715 /* pcmpestr[im] insns. */
25716 for (i = 0, d = bdesc_pcmpestr;
25717 i < ARRAY_SIZE (bdesc_pcmpestr);
25718 i++, d++)
25719 {
25720 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25721 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25722 else
25723 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25724 def_builtin_const (d->mask, d->name, ftype, d->code);
25725 }
25726
25727 /* pcmpistr[im] insns. */
25728 for (i = 0, d = bdesc_pcmpistr;
25729 i < ARRAY_SIZE (bdesc_pcmpistr);
25730 i++, d++)
25731 {
25732 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25733 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25734 else
25735 ftype = INT_FTYPE_V16QI_V16QI_INT;
25736 def_builtin_const (d->mask, d->name, ftype, d->code);
25737 }
25738
25739 /* comi/ucomi insns. */
25740 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25741 {
25742 if (d->mask == OPTION_MASK_ISA_SSE2)
25743 ftype = INT_FTYPE_V2DF_V2DF;
25744 else
25745 ftype = INT_FTYPE_V4SF_V4SF;
25746 def_builtin_const (d->mask, d->name, ftype, d->code);
25747 }
25748
25749 /* SSE */
25750 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25751 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25752 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25753 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25754
25755 /* SSE or 3DNow!A */
25756 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25757 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25758 IX86_BUILTIN_MASKMOVQ);
25759
25760 /* SSE2 */
25761 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25762 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25763
25764 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25765 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25766 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25767 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25768
25769 /* SSE3. */
25770 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25771 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25772 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25773 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25774
25775 /* AES */
25776 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25777 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25778 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25779 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25780 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25781 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25782 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25783 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25784 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25785 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25786 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25787 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25788
25789 /* PCLMUL */
25790 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25791 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25792
25793 /* RDRND */
25794 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25795 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25796 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25797 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25798 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25799 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25800 IX86_BUILTIN_RDRAND64_STEP);
25801
25802 /* MMX access to the vec_init patterns. */
25803 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25804 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25805
25806 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25807 V4HI_FTYPE_HI_HI_HI_HI,
25808 IX86_BUILTIN_VEC_INIT_V4HI);
25809
25810 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25811 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25812 IX86_BUILTIN_VEC_INIT_V8QI);
25813
25814 /* Access to the vec_extract patterns. */
25815 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25816 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25817 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25818 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25819 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25820 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25821 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25822 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25823 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25824 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25825
25826 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25827 "__builtin_ia32_vec_ext_v4hi",
25828 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25829
25830 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25831 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25832
25833 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25834 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25835
25836 /* Access to the vec_set patterns. */
25837 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25838 "__builtin_ia32_vec_set_v2di",
25839 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25840
25841 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25842 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25843
25844 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25845 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25846
25847 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25848 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25849
25850 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25851 "__builtin_ia32_vec_set_v4hi",
25852 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25853
25854 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25855 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25856
25857 /* Add FMA4 multi-arg argument instructions */
25858 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25859 {
25860 if (d->name == 0)
25861 continue;
25862
25863 ftype = (enum ix86_builtin_func_type) d->flag;
25864 def_builtin_const (d->mask, d->name, ftype, d->code);
25865 }
25866 }
25867
25868 /* Internal method for ix86_init_builtins. */
25869
25870 static void
25871 ix86_init_builtins_va_builtins_abi (void)
25872 {
25873 tree ms_va_ref, sysv_va_ref;
25874 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25875 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25876 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25877 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25878
25879 if (!TARGET_64BIT)
25880 return;
25881 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25882 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25883 ms_va_ref = build_reference_type (ms_va_list_type_node);
25884 sysv_va_ref =
25885 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25886
25887 fnvoid_va_end_ms =
25888 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25889 fnvoid_va_start_ms =
25890 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25891 fnvoid_va_end_sysv =
25892 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25893 fnvoid_va_start_sysv =
25894 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25895 NULL_TREE);
25896 fnvoid_va_copy_ms =
25897 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25898 NULL_TREE);
25899 fnvoid_va_copy_sysv =
25900 build_function_type_list (void_type_node, sysv_va_ref,
25901 sysv_va_ref, NULL_TREE);
25902
25903 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25904 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25905 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25906 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25907 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25908 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25909 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25910 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25911 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25912 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25913 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25914 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25915 }
25916
25917 static void
25918 ix86_init_builtin_types (void)
25919 {
25920 tree float128_type_node, float80_type_node;
25921
25922 /* The __float80 type. */
25923 float80_type_node = long_double_type_node;
25924 if (TYPE_MODE (float80_type_node) != XFmode)
25925 {
25926 /* The __float80 type. */
25927 float80_type_node = make_node (REAL_TYPE);
25928
25929 TYPE_PRECISION (float80_type_node) = 80;
25930 layout_type (float80_type_node);
25931 }
25932 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25933
25934 /* The __float128 type. */
25935 float128_type_node = make_node (REAL_TYPE);
25936 TYPE_PRECISION (float128_type_node) = 128;
25937 layout_type (float128_type_node);
25938 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25939
25940 /* This macro is built by i386-builtin-types.awk. */
25941 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25942 }
25943
25944 static void
25945 ix86_init_builtins (void)
25946 {
25947 tree t;
25948
25949 ix86_init_builtin_types ();
25950
25951 /* TFmode support builtins. */
25952 def_builtin_const (0, "__builtin_infq",
25953 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25954 def_builtin_const (0, "__builtin_huge_valq",
25955 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25956
25957 /* We will expand them to normal call if SSE2 isn't available since
25958 they are used by libgcc. */
25959 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25960 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25961 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25962 TREE_READONLY (t) = 1;
25963 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25964
25965 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25966 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25967 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25968 TREE_READONLY (t) = 1;
25969 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25970
25971 ix86_init_mmx_sse_builtins ();
25972
25973 if (TARGET_64BIT)
25974 ix86_init_builtins_va_builtins_abi ();
25975
25976 #ifdef SUBTARGET_INIT_BUILTINS
25977 SUBTARGET_INIT_BUILTINS;
25978 #endif
25979 }
25980
25981 /* Return the ix86 builtin for CODE. */
25982
25983 static tree
25984 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25985 {
25986 if (code >= IX86_BUILTIN_MAX)
25987 return error_mark_node;
25988
25989 return ix86_builtins[code];
25990 }
25991
25992 /* Errors in the source file can cause expand_expr to return const0_rtx
25993 where we expect a vector. To avoid crashing, use one of the vector
25994 clear instructions. */
25995 static rtx
25996 safe_vector_operand (rtx x, enum machine_mode mode)
25997 {
25998 if (x == const0_rtx)
25999 x = CONST0_RTX (mode);
26000 return x;
26001 }
26002
26003 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26004
26005 static rtx
26006 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26007 {
26008 rtx pat;
26009 tree arg0 = CALL_EXPR_ARG (exp, 0);
26010 tree arg1 = CALL_EXPR_ARG (exp, 1);
26011 rtx op0 = expand_normal (arg0);
26012 rtx op1 = expand_normal (arg1);
26013 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26014 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26015 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26016
26017 if (VECTOR_MODE_P (mode0))
26018 op0 = safe_vector_operand (op0, mode0);
26019 if (VECTOR_MODE_P (mode1))
26020 op1 = safe_vector_operand (op1, mode1);
26021
26022 if (optimize || !target
26023 || GET_MODE (target) != tmode
26024 || !insn_data[icode].operand[0].predicate (target, tmode))
26025 target = gen_reg_rtx (tmode);
26026
26027 if (GET_MODE (op1) == SImode && mode1 == TImode)
26028 {
26029 rtx x = gen_reg_rtx (V4SImode);
26030 emit_insn (gen_sse2_loadd (x, op1));
26031 op1 = gen_lowpart (TImode, x);
26032 }
26033
26034 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26035 op0 = copy_to_mode_reg (mode0, op0);
26036 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26037 op1 = copy_to_mode_reg (mode1, op1);
26038
26039 pat = GEN_FCN (icode) (target, op0, op1);
26040 if (! pat)
26041 return 0;
26042
26043 emit_insn (pat);
26044
26045 return target;
26046 }
26047
26048 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26049
26050 static rtx
26051 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26052 enum ix86_builtin_func_type m_type,
26053 enum rtx_code sub_code)
26054 {
26055 rtx pat;
26056 int i;
26057 int nargs;
26058 bool comparison_p = false;
26059 bool tf_p = false;
26060 bool last_arg_constant = false;
26061 int num_memory = 0;
26062 struct {
26063 rtx op;
26064 enum machine_mode mode;
26065 } args[4];
26066
26067 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26068
26069 switch (m_type)
26070 {
26071 case MULTI_ARG_4_DF2_DI_I:
26072 case MULTI_ARG_4_DF2_DI_I1:
26073 case MULTI_ARG_4_SF2_SI_I:
26074 case MULTI_ARG_4_SF2_SI_I1:
26075 nargs = 4;
26076 last_arg_constant = true;
26077 break;
26078
26079 case MULTI_ARG_3_SF:
26080 case MULTI_ARG_3_DF:
26081 case MULTI_ARG_3_SF2:
26082 case MULTI_ARG_3_DF2:
26083 case MULTI_ARG_3_DI:
26084 case MULTI_ARG_3_SI:
26085 case MULTI_ARG_3_SI_DI:
26086 case MULTI_ARG_3_HI:
26087 case MULTI_ARG_3_HI_SI:
26088 case MULTI_ARG_3_QI:
26089 case MULTI_ARG_3_DI2:
26090 case MULTI_ARG_3_SI2:
26091 case MULTI_ARG_3_HI2:
26092 case MULTI_ARG_3_QI2:
26093 nargs = 3;
26094 break;
26095
26096 case MULTI_ARG_2_SF:
26097 case MULTI_ARG_2_DF:
26098 case MULTI_ARG_2_DI:
26099 case MULTI_ARG_2_SI:
26100 case MULTI_ARG_2_HI:
26101 case MULTI_ARG_2_QI:
26102 nargs = 2;
26103 break;
26104
26105 case MULTI_ARG_2_DI_IMM:
26106 case MULTI_ARG_2_SI_IMM:
26107 case MULTI_ARG_2_HI_IMM:
26108 case MULTI_ARG_2_QI_IMM:
26109 nargs = 2;
26110 last_arg_constant = true;
26111 break;
26112
26113 case MULTI_ARG_1_SF:
26114 case MULTI_ARG_1_DF:
26115 case MULTI_ARG_1_SF2:
26116 case MULTI_ARG_1_DF2:
26117 case MULTI_ARG_1_DI:
26118 case MULTI_ARG_1_SI:
26119 case MULTI_ARG_1_HI:
26120 case MULTI_ARG_1_QI:
26121 case MULTI_ARG_1_SI_DI:
26122 case MULTI_ARG_1_HI_DI:
26123 case MULTI_ARG_1_HI_SI:
26124 case MULTI_ARG_1_QI_DI:
26125 case MULTI_ARG_1_QI_SI:
26126 case MULTI_ARG_1_QI_HI:
26127 nargs = 1;
26128 break;
26129
26130 case MULTI_ARG_2_DI_CMP:
26131 case MULTI_ARG_2_SI_CMP:
26132 case MULTI_ARG_2_HI_CMP:
26133 case MULTI_ARG_2_QI_CMP:
26134 nargs = 2;
26135 comparison_p = true;
26136 break;
26137
26138 case MULTI_ARG_2_SF_TF:
26139 case MULTI_ARG_2_DF_TF:
26140 case MULTI_ARG_2_DI_TF:
26141 case MULTI_ARG_2_SI_TF:
26142 case MULTI_ARG_2_HI_TF:
26143 case MULTI_ARG_2_QI_TF:
26144 nargs = 2;
26145 tf_p = true;
26146 break;
26147
26148 default:
26149 gcc_unreachable ();
26150 }
26151
26152 if (optimize || !target
26153 || GET_MODE (target) != tmode
26154 || !insn_data[icode].operand[0].predicate (target, tmode))
26155 target = gen_reg_rtx (tmode);
26156
26157 gcc_assert (nargs <= 4);
26158
26159 for (i = 0; i < nargs; i++)
26160 {
26161 tree arg = CALL_EXPR_ARG (exp, i);
26162 rtx op = expand_normal (arg);
26163 int adjust = (comparison_p) ? 1 : 0;
26164 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26165
26166 if (last_arg_constant && i == nargs-1)
26167 {
26168 if (!CONST_INT_P (op))
26169 {
26170 error ("last argument must be an immediate");
26171 return gen_reg_rtx (tmode);
26172 }
26173 }
26174 else
26175 {
26176 if (VECTOR_MODE_P (mode))
26177 op = safe_vector_operand (op, mode);
26178
26179 /* If we aren't optimizing, only allow one memory operand to be
26180 generated. */
26181 if (memory_operand (op, mode))
26182 num_memory++;
26183
26184 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26185
26186 if (optimize
26187 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26188 || num_memory > 1)
26189 op = force_reg (mode, op);
26190 }
26191
26192 args[i].op = op;
26193 args[i].mode = mode;
26194 }
26195
26196 switch (nargs)
26197 {
26198 case 1:
26199 pat = GEN_FCN (icode) (target, args[0].op);
26200 break;
26201
26202 case 2:
26203 if (tf_p)
26204 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26205 GEN_INT ((int)sub_code));
26206 else if (! comparison_p)
26207 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26208 else
26209 {
26210 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26211 args[0].op,
26212 args[1].op);
26213
26214 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26215 }
26216 break;
26217
26218 case 3:
26219 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26220 break;
26221
26222 case 4:
26223 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26224 break;
26225
26226 default:
26227 gcc_unreachable ();
26228 }
26229
26230 if (! pat)
26231 return 0;
26232
26233 emit_insn (pat);
26234 return target;
26235 }
26236
26237 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26238 insns with vec_merge. */
26239
26240 static rtx
26241 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26242 rtx target)
26243 {
26244 rtx pat;
26245 tree arg0 = CALL_EXPR_ARG (exp, 0);
26246 rtx op1, op0 = expand_normal (arg0);
26247 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26248 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26249
26250 if (optimize || !target
26251 || GET_MODE (target) != tmode
26252 || !insn_data[icode].operand[0].predicate (target, tmode))
26253 target = gen_reg_rtx (tmode);
26254
26255 if (VECTOR_MODE_P (mode0))
26256 op0 = safe_vector_operand (op0, mode0);
26257
26258 if ((optimize && !register_operand (op0, mode0))
26259 || !insn_data[icode].operand[1].predicate (op0, mode0))
26260 op0 = copy_to_mode_reg (mode0, op0);
26261
26262 op1 = op0;
26263 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26264 op1 = copy_to_mode_reg (mode0, op1);
26265
26266 pat = GEN_FCN (icode) (target, op0, op1);
26267 if (! pat)
26268 return 0;
26269 emit_insn (pat);
26270 return target;
26271 }
26272
26273 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26274
26275 static rtx
26276 ix86_expand_sse_compare (const struct builtin_description *d,
26277 tree exp, rtx target, bool swap)
26278 {
26279 rtx pat;
26280 tree arg0 = CALL_EXPR_ARG (exp, 0);
26281 tree arg1 = CALL_EXPR_ARG (exp, 1);
26282 rtx op0 = expand_normal (arg0);
26283 rtx op1 = expand_normal (arg1);
26284 rtx op2;
26285 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26286 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26287 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26288 enum rtx_code comparison = d->comparison;
26289
26290 if (VECTOR_MODE_P (mode0))
26291 op0 = safe_vector_operand (op0, mode0);
26292 if (VECTOR_MODE_P (mode1))
26293 op1 = safe_vector_operand (op1, mode1);
26294
26295 /* Swap operands if we have a comparison that isn't available in
26296 hardware. */
26297 if (swap)
26298 {
26299 rtx tmp = gen_reg_rtx (mode1);
26300 emit_move_insn (tmp, op1);
26301 op1 = op0;
26302 op0 = tmp;
26303 }
26304
26305 if (optimize || !target
26306 || GET_MODE (target) != tmode
26307 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26308 target = gen_reg_rtx (tmode);
26309
26310 if ((optimize && !register_operand (op0, mode0))
26311 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26312 op0 = copy_to_mode_reg (mode0, op0);
26313 if ((optimize && !register_operand (op1, mode1))
26314 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26315 op1 = copy_to_mode_reg (mode1, op1);
26316
26317 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26318 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26319 if (! pat)
26320 return 0;
26321 emit_insn (pat);
26322 return target;
26323 }
26324
26325 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26326
26327 static rtx
26328 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26329 rtx target)
26330 {
26331 rtx pat;
26332 tree arg0 = CALL_EXPR_ARG (exp, 0);
26333 tree arg1 = CALL_EXPR_ARG (exp, 1);
26334 rtx op0 = expand_normal (arg0);
26335 rtx op1 = expand_normal (arg1);
26336 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26337 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26338 enum rtx_code comparison = d->comparison;
26339
26340 if (VECTOR_MODE_P (mode0))
26341 op0 = safe_vector_operand (op0, mode0);
26342 if (VECTOR_MODE_P (mode1))
26343 op1 = safe_vector_operand (op1, mode1);
26344
26345 /* Swap operands if we have a comparison that isn't available in
26346 hardware. */
26347 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26348 {
26349 rtx tmp = op1;
26350 op1 = op0;
26351 op0 = tmp;
26352 }
26353
26354 target = gen_reg_rtx (SImode);
26355 emit_move_insn (target, const0_rtx);
26356 target = gen_rtx_SUBREG (QImode, target, 0);
26357
26358 if ((optimize && !register_operand (op0, mode0))
26359 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26360 op0 = copy_to_mode_reg (mode0, op0);
26361 if ((optimize && !register_operand (op1, mode1))
26362 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26363 op1 = copy_to_mode_reg (mode1, op1);
26364
26365 pat = GEN_FCN (d->icode) (op0, op1);
26366 if (! pat)
26367 return 0;
26368 emit_insn (pat);
26369 emit_insn (gen_rtx_SET (VOIDmode,
26370 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26371 gen_rtx_fmt_ee (comparison, QImode,
26372 SET_DEST (pat),
26373 const0_rtx)));
26374
26375 return SUBREG_REG (target);
26376 }
26377
26378 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26379
26380 static rtx
26381 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26382 rtx target)
26383 {
26384 rtx pat;
26385 tree arg0 = CALL_EXPR_ARG (exp, 0);
26386 rtx op1, op0 = expand_normal (arg0);
26387 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26388 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26389
26390 if (optimize || target == 0
26391 || GET_MODE (target) != tmode
26392 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26393 target = gen_reg_rtx (tmode);
26394
26395 if (VECTOR_MODE_P (mode0))
26396 op0 = safe_vector_operand (op0, mode0);
26397
26398 if ((optimize && !register_operand (op0, mode0))
26399 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26400 op0 = copy_to_mode_reg (mode0, op0);
26401
26402 op1 = GEN_INT (d->comparison);
26403
26404 pat = GEN_FCN (d->icode) (target, op0, op1);
26405 if (! pat)
26406 return 0;
26407 emit_insn (pat);
26408 return target;
26409 }
26410
26411 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26412
26413 static rtx
26414 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26415 rtx target)
26416 {
26417 rtx pat;
26418 tree arg0 = CALL_EXPR_ARG (exp, 0);
26419 tree arg1 = CALL_EXPR_ARG (exp, 1);
26420 rtx op0 = expand_normal (arg0);
26421 rtx op1 = expand_normal (arg1);
26422 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26423 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26424 enum rtx_code comparison = d->comparison;
26425
26426 if (VECTOR_MODE_P (mode0))
26427 op0 = safe_vector_operand (op0, mode0);
26428 if (VECTOR_MODE_P (mode1))
26429 op1 = safe_vector_operand (op1, mode1);
26430
26431 target = gen_reg_rtx (SImode);
26432 emit_move_insn (target, const0_rtx);
26433 target = gen_rtx_SUBREG (QImode, target, 0);
26434
26435 if ((optimize && !register_operand (op0, mode0))
26436 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26437 op0 = copy_to_mode_reg (mode0, op0);
26438 if ((optimize && !register_operand (op1, mode1))
26439 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26440 op1 = copy_to_mode_reg (mode1, op1);
26441
26442 pat = GEN_FCN (d->icode) (op0, op1);
26443 if (! pat)
26444 return 0;
26445 emit_insn (pat);
26446 emit_insn (gen_rtx_SET (VOIDmode,
26447 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26448 gen_rtx_fmt_ee (comparison, QImode,
26449 SET_DEST (pat),
26450 const0_rtx)));
26451
26452 return SUBREG_REG (target);
26453 }
26454
26455 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26456
26457 static rtx
26458 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26459 tree exp, rtx target)
26460 {
26461 rtx pat;
26462 tree arg0 = CALL_EXPR_ARG (exp, 0);
26463 tree arg1 = CALL_EXPR_ARG (exp, 1);
26464 tree arg2 = CALL_EXPR_ARG (exp, 2);
26465 tree arg3 = CALL_EXPR_ARG (exp, 3);
26466 tree arg4 = CALL_EXPR_ARG (exp, 4);
26467 rtx scratch0, scratch1;
26468 rtx op0 = expand_normal (arg0);
26469 rtx op1 = expand_normal (arg1);
26470 rtx op2 = expand_normal (arg2);
26471 rtx op3 = expand_normal (arg3);
26472 rtx op4 = expand_normal (arg4);
26473 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26474
26475 tmode0 = insn_data[d->icode].operand[0].mode;
26476 tmode1 = insn_data[d->icode].operand[1].mode;
26477 modev2 = insn_data[d->icode].operand[2].mode;
26478 modei3 = insn_data[d->icode].operand[3].mode;
26479 modev4 = insn_data[d->icode].operand[4].mode;
26480 modei5 = insn_data[d->icode].operand[5].mode;
26481 modeimm = insn_data[d->icode].operand[6].mode;
26482
26483 if (VECTOR_MODE_P (modev2))
26484 op0 = safe_vector_operand (op0, modev2);
26485 if (VECTOR_MODE_P (modev4))
26486 op2 = safe_vector_operand (op2, modev4);
26487
26488 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26489 op0 = copy_to_mode_reg (modev2, op0);
26490 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26491 op1 = copy_to_mode_reg (modei3, op1);
26492 if ((optimize && !register_operand (op2, modev4))
26493 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26494 op2 = copy_to_mode_reg (modev4, op2);
26495 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26496 op3 = copy_to_mode_reg (modei5, op3);
26497
26498 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26499 {
26500 error ("the fifth argument must be a 8-bit immediate");
26501 return const0_rtx;
26502 }
26503
26504 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26505 {
26506 if (optimize || !target
26507 || GET_MODE (target) != tmode0
26508 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26509 target = gen_reg_rtx (tmode0);
26510
26511 scratch1 = gen_reg_rtx (tmode1);
26512
26513 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26514 }
26515 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26516 {
26517 if (optimize || !target
26518 || GET_MODE (target) != tmode1
26519 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26520 target = gen_reg_rtx (tmode1);
26521
26522 scratch0 = gen_reg_rtx (tmode0);
26523
26524 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26525 }
26526 else
26527 {
26528 gcc_assert (d->flag);
26529
26530 scratch0 = gen_reg_rtx (tmode0);
26531 scratch1 = gen_reg_rtx (tmode1);
26532
26533 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26534 }
26535
26536 if (! pat)
26537 return 0;
26538
26539 emit_insn (pat);
26540
26541 if (d->flag)
26542 {
26543 target = gen_reg_rtx (SImode);
26544 emit_move_insn (target, const0_rtx);
26545 target = gen_rtx_SUBREG (QImode, target, 0);
26546
26547 emit_insn
26548 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26549 gen_rtx_fmt_ee (EQ, QImode,
26550 gen_rtx_REG ((enum machine_mode) d->flag,
26551 FLAGS_REG),
26552 const0_rtx)));
26553 return SUBREG_REG (target);
26554 }
26555 else
26556 return target;
26557 }
26558
26559
26560 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26561
26562 static rtx
26563 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26564 tree exp, rtx target)
26565 {
26566 rtx pat;
26567 tree arg0 = CALL_EXPR_ARG (exp, 0);
26568 tree arg1 = CALL_EXPR_ARG (exp, 1);
26569 tree arg2 = CALL_EXPR_ARG (exp, 2);
26570 rtx scratch0, scratch1;
26571 rtx op0 = expand_normal (arg0);
26572 rtx op1 = expand_normal (arg1);
26573 rtx op2 = expand_normal (arg2);
26574 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26575
26576 tmode0 = insn_data[d->icode].operand[0].mode;
26577 tmode1 = insn_data[d->icode].operand[1].mode;
26578 modev2 = insn_data[d->icode].operand[2].mode;
26579 modev3 = insn_data[d->icode].operand[3].mode;
26580 modeimm = insn_data[d->icode].operand[4].mode;
26581
26582 if (VECTOR_MODE_P (modev2))
26583 op0 = safe_vector_operand (op0, modev2);
26584 if (VECTOR_MODE_P (modev3))
26585 op1 = safe_vector_operand (op1, modev3);
26586
26587 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26588 op0 = copy_to_mode_reg (modev2, op0);
26589 if ((optimize && !register_operand (op1, modev3))
26590 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26591 op1 = copy_to_mode_reg (modev3, op1);
26592
26593 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26594 {
26595 error ("the third argument must be a 8-bit immediate");
26596 return const0_rtx;
26597 }
26598
26599 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26600 {
26601 if (optimize || !target
26602 || GET_MODE (target) != tmode0
26603 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26604 target = gen_reg_rtx (tmode0);
26605
26606 scratch1 = gen_reg_rtx (tmode1);
26607
26608 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26609 }
26610 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26611 {
26612 if (optimize || !target
26613 || GET_MODE (target) != tmode1
26614 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26615 target = gen_reg_rtx (tmode1);
26616
26617 scratch0 = gen_reg_rtx (tmode0);
26618
26619 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26620 }
26621 else
26622 {
26623 gcc_assert (d->flag);
26624
26625 scratch0 = gen_reg_rtx (tmode0);
26626 scratch1 = gen_reg_rtx (tmode1);
26627
26628 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26629 }
26630
26631 if (! pat)
26632 return 0;
26633
26634 emit_insn (pat);
26635
26636 if (d->flag)
26637 {
26638 target = gen_reg_rtx (SImode);
26639 emit_move_insn (target, const0_rtx);
26640 target = gen_rtx_SUBREG (QImode, target, 0);
26641
26642 emit_insn
26643 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26644 gen_rtx_fmt_ee (EQ, QImode,
26645 gen_rtx_REG ((enum machine_mode) d->flag,
26646 FLAGS_REG),
26647 const0_rtx)));
26648 return SUBREG_REG (target);
26649 }
26650 else
26651 return target;
26652 }
26653
26654 /* Subroutine of ix86_expand_builtin to take care of insns with
26655 variable number of operands. */
26656
26657 static rtx
26658 ix86_expand_args_builtin (const struct builtin_description *d,
26659 tree exp, rtx target)
26660 {
26661 rtx pat, real_target;
26662 unsigned int i, nargs;
26663 unsigned int nargs_constant = 0;
26664 int num_memory = 0;
26665 struct
26666 {
26667 rtx op;
26668 enum machine_mode mode;
26669 } args[4];
26670 bool last_arg_count = false;
26671 enum insn_code icode = d->icode;
26672 const struct insn_data_d *insn_p = &insn_data[icode];
26673 enum machine_mode tmode = insn_p->operand[0].mode;
26674 enum machine_mode rmode = VOIDmode;
26675 bool swap = false;
26676 enum rtx_code comparison = d->comparison;
26677
26678 switch ((enum ix86_builtin_func_type) d->flag)
26679 {
26680 case V2DF_FTYPE_V2DF_ROUND:
26681 case V4DF_FTYPE_V4DF_ROUND:
26682 case V4SF_FTYPE_V4SF_ROUND:
26683 case V8SF_FTYPE_V8SF_ROUND:
26684 return ix86_expand_sse_round (d, exp, target);
26685 case INT_FTYPE_V8SF_V8SF_PTEST:
26686 case INT_FTYPE_V4DI_V4DI_PTEST:
26687 case INT_FTYPE_V4DF_V4DF_PTEST:
26688 case INT_FTYPE_V4SF_V4SF_PTEST:
26689 case INT_FTYPE_V2DI_V2DI_PTEST:
26690 case INT_FTYPE_V2DF_V2DF_PTEST:
26691 return ix86_expand_sse_ptest (d, exp, target);
26692 case FLOAT128_FTYPE_FLOAT128:
26693 case FLOAT_FTYPE_FLOAT:
26694 case INT_FTYPE_INT:
26695 case UINT64_FTYPE_INT:
26696 case UINT16_FTYPE_UINT16:
26697 case INT64_FTYPE_INT64:
26698 case INT64_FTYPE_V4SF:
26699 case INT64_FTYPE_V2DF:
26700 case INT_FTYPE_V16QI:
26701 case INT_FTYPE_V8QI:
26702 case INT_FTYPE_V8SF:
26703 case INT_FTYPE_V4DF:
26704 case INT_FTYPE_V4SF:
26705 case INT_FTYPE_V2DF:
26706 case V16QI_FTYPE_V16QI:
26707 case V8SI_FTYPE_V8SF:
26708 case V8SI_FTYPE_V4SI:
26709 case V8HI_FTYPE_V8HI:
26710 case V8HI_FTYPE_V16QI:
26711 case V8QI_FTYPE_V8QI:
26712 case V8SF_FTYPE_V8SF:
26713 case V8SF_FTYPE_V8SI:
26714 case V8SF_FTYPE_V4SF:
26715 case V8SF_FTYPE_V8HI:
26716 case V4SI_FTYPE_V4SI:
26717 case V4SI_FTYPE_V16QI:
26718 case V4SI_FTYPE_V4SF:
26719 case V4SI_FTYPE_V8SI:
26720 case V4SI_FTYPE_V8HI:
26721 case V4SI_FTYPE_V4DF:
26722 case V4SI_FTYPE_V2DF:
26723 case V4HI_FTYPE_V4HI:
26724 case V4DF_FTYPE_V4DF:
26725 case V4DF_FTYPE_V4SI:
26726 case V4DF_FTYPE_V4SF:
26727 case V4DF_FTYPE_V2DF:
26728 case V4SF_FTYPE_V4SF:
26729 case V4SF_FTYPE_V4SI:
26730 case V4SF_FTYPE_V8SF:
26731 case V4SF_FTYPE_V4DF:
26732 case V4SF_FTYPE_V8HI:
26733 case V4SF_FTYPE_V2DF:
26734 case V2DI_FTYPE_V2DI:
26735 case V2DI_FTYPE_V16QI:
26736 case V2DI_FTYPE_V8HI:
26737 case V2DI_FTYPE_V4SI:
26738 case V2DF_FTYPE_V2DF:
26739 case V2DF_FTYPE_V4SI:
26740 case V2DF_FTYPE_V4DF:
26741 case V2DF_FTYPE_V4SF:
26742 case V2DF_FTYPE_V2SI:
26743 case V2SI_FTYPE_V2SI:
26744 case V2SI_FTYPE_V4SF:
26745 case V2SI_FTYPE_V2SF:
26746 case V2SI_FTYPE_V2DF:
26747 case V2SF_FTYPE_V2SF:
26748 case V2SF_FTYPE_V2SI:
26749 nargs = 1;
26750 break;
26751 case V4SF_FTYPE_V4SF_VEC_MERGE:
26752 case V2DF_FTYPE_V2DF_VEC_MERGE:
26753 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26754 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26755 case V16QI_FTYPE_V16QI_V16QI:
26756 case V16QI_FTYPE_V8HI_V8HI:
26757 case V8QI_FTYPE_V8QI_V8QI:
26758 case V8QI_FTYPE_V4HI_V4HI:
26759 case V8HI_FTYPE_V8HI_V8HI:
26760 case V8HI_FTYPE_V16QI_V16QI:
26761 case V8HI_FTYPE_V4SI_V4SI:
26762 case V8SF_FTYPE_V8SF_V8SF:
26763 case V8SF_FTYPE_V8SF_V8SI:
26764 case V4SI_FTYPE_V4SI_V4SI:
26765 case V4SI_FTYPE_V8HI_V8HI:
26766 case V4SI_FTYPE_V4SF_V4SF:
26767 case V4SI_FTYPE_V2DF_V2DF:
26768 case V4HI_FTYPE_V4HI_V4HI:
26769 case V4HI_FTYPE_V8QI_V8QI:
26770 case V4HI_FTYPE_V2SI_V2SI:
26771 case V4DF_FTYPE_V4DF_V4DF:
26772 case V4DF_FTYPE_V4DF_V4DI:
26773 case V4SF_FTYPE_V4SF_V4SF:
26774 case V4SF_FTYPE_V4SF_V4SI:
26775 case V4SF_FTYPE_V4SF_V2SI:
26776 case V4SF_FTYPE_V4SF_V2DF:
26777 case V4SF_FTYPE_V4SF_DI:
26778 case V4SF_FTYPE_V4SF_SI:
26779 case V2DI_FTYPE_V2DI_V2DI:
26780 case V2DI_FTYPE_V16QI_V16QI:
26781 case V2DI_FTYPE_V4SI_V4SI:
26782 case V2DI_FTYPE_V2DI_V16QI:
26783 case V2DI_FTYPE_V2DF_V2DF:
26784 case V2SI_FTYPE_V2SI_V2SI:
26785 case V2SI_FTYPE_V4HI_V4HI:
26786 case V2SI_FTYPE_V2SF_V2SF:
26787 case V2DF_FTYPE_V2DF_V2DF:
26788 case V2DF_FTYPE_V2DF_V4SF:
26789 case V2DF_FTYPE_V2DF_V2DI:
26790 case V2DF_FTYPE_V2DF_DI:
26791 case V2DF_FTYPE_V2DF_SI:
26792 case V2SF_FTYPE_V2SF_V2SF:
26793 case V1DI_FTYPE_V1DI_V1DI:
26794 case V1DI_FTYPE_V8QI_V8QI:
26795 case V1DI_FTYPE_V2SI_V2SI:
26796 if (comparison == UNKNOWN)
26797 return ix86_expand_binop_builtin (icode, exp, target);
26798 nargs = 2;
26799 break;
26800 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26801 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26802 gcc_assert (comparison != UNKNOWN);
26803 nargs = 2;
26804 swap = true;
26805 break;
26806 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26807 case V8HI_FTYPE_V8HI_SI_COUNT:
26808 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26809 case V4SI_FTYPE_V4SI_SI_COUNT:
26810 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26811 case V4HI_FTYPE_V4HI_SI_COUNT:
26812 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26813 case V2DI_FTYPE_V2DI_SI_COUNT:
26814 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26815 case V2SI_FTYPE_V2SI_SI_COUNT:
26816 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26817 case V1DI_FTYPE_V1DI_SI_COUNT:
26818 nargs = 2;
26819 last_arg_count = true;
26820 break;
26821 case UINT64_FTYPE_UINT64_UINT64:
26822 case UINT_FTYPE_UINT_UINT:
26823 case UINT_FTYPE_UINT_USHORT:
26824 case UINT_FTYPE_UINT_UCHAR:
26825 case UINT16_FTYPE_UINT16_INT:
26826 case UINT8_FTYPE_UINT8_INT:
26827 nargs = 2;
26828 break;
26829 case V2DI_FTYPE_V2DI_INT_CONVERT:
26830 nargs = 2;
26831 rmode = V1TImode;
26832 nargs_constant = 1;
26833 break;
26834 case V8HI_FTYPE_V8HI_INT:
26835 case V8HI_FTYPE_V8SF_INT:
26836 case V8HI_FTYPE_V4SF_INT:
26837 case V8SF_FTYPE_V8SF_INT:
26838 case V4SI_FTYPE_V4SI_INT:
26839 case V4SI_FTYPE_V8SI_INT:
26840 case V4HI_FTYPE_V4HI_INT:
26841 case V4DF_FTYPE_V4DF_INT:
26842 case V4SF_FTYPE_V4SF_INT:
26843 case V4SF_FTYPE_V8SF_INT:
26844 case V2DI_FTYPE_V2DI_INT:
26845 case V2DF_FTYPE_V2DF_INT:
26846 case V2DF_FTYPE_V4DF_INT:
26847 nargs = 2;
26848 nargs_constant = 1;
26849 break;
26850 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26851 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26852 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26853 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26854 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26855 nargs = 3;
26856 break;
26857 case V16QI_FTYPE_V16QI_V16QI_INT:
26858 case V8HI_FTYPE_V8HI_V8HI_INT:
26859 case V8SI_FTYPE_V8SI_V8SI_INT:
26860 case V8SI_FTYPE_V8SI_V4SI_INT:
26861 case V8SF_FTYPE_V8SF_V8SF_INT:
26862 case V8SF_FTYPE_V8SF_V4SF_INT:
26863 case V4SI_FTYPE_V4SI_V4SI_INT:
26864 case V4DF_FTYPE_V4DF_V4DF_INT:
26865 case V4DF_FTYPE_V4DF_V2DF_INT:
26866 case V4SF_FTYPE_V4SF_V4SF_INT:
26867 case V2DI_FTYPE_V2DI_V2DI_INT:
26868 case V2DF_FTYPE_V2DF_V2DF_INT:
26869 nargs = 3;
26870 nargs_constant = 1;
26871 break;
26872 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26873 nargs = 3;
26874 rmode = V2DImode;
26875 nargs_constant = 1;
26876 break;
26877 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26878 nargs = 3;
26879 rmode = DImode;
26880 nargs_constant = 1;
26881 break;
26882 case V2DI_FTYPE_V2DI_UINT_UINT:
26883 nargs = 3;
26884 nargs_constant = 2;
26885 break;
26886 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26887 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26888 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26889 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26890 nargs = 4;
26891 nargs_constant = 1;
26892 break;
26893 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26894 nargs = 4;
26895 nargs_constant = 2;
26896 break;
26897 default:
26898 gcc_unreachable ();
26899 }
26900
26901 gcc_assert (nargs <= ARRAY_SIZE (args));
26902
26903 if (comparison != UNKNOWN)
26904 {
26905 gcc_assert (nargs == 2);
26906 return ix86_expand_sse_compare (d, exp, target, swap);
26907 }
26908
26909 if (rmode == VOIDmode || rmode == tmode)
26910 {
26911 if (optimize
26912 || target == 0
26913 || GET_MODE (target) != tmode
26914 || !insn_p->operand[0].predicate (target, tmode))
26915 target = gen_reg_rtx (tmode);
26916 real_target = target;
26917 }
26918 else
26919 {
26920 target = gen_reg_rtx (rmode);
26921 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26922 }
26923
26924 for (i = 0; i < nargs; i++)
26925 {
26926 tree arg = CALL_EXPR_ARG (exp, i);
26927 rtx op = expand_normal (arg);
26928 enum machine_mode mode = insn_p->operand[i + 1].mode;
26929 bool match = insn_p->operand[i + 1].predicate (op, mode);
26930
26931 if (last_arg_count && (i + 1) == nargs)
26932 {
26933 /* SIMD shift insns take either an 8-bit immediate or
26934 register as count. But builtin functions take int as
26935 count. If count doesn't match, we put it in register. */
26936 if (!match)
26937 {
26938 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26939 if (!insn_p->operand[i + 1].predicate (op, mode))
26940 op = copy_to_reg (op);
26941 }
26942 }
26943 else if ((nargs - i) <= nargs_constant)
26944 {
26945 if (!match)
26946 switch (icode)
26947 {
26948 case CODE_FOR_sse4_1_roundpd:
26949 case CODE_FOR_sse4_1_roundps:
26950 case CODE_FOR_sse4_1_roundsd:
26951 case CODE_FOR_sse4_1_roundss:
26952 case CODE_FOR_sse4_1_blendps:
26953 case CODE_FOR_avx_blendpd256:
26954 case CODE_FOR_avx_vpermilv4df:
26955 case CODE_FOR_avx_roundpd256:
26956 case CODE_FOR_avx_roundps256:
26957 error ("the last argument must be a 4-bit immediate");
26958 return const0_rtx;
26959
26960 case CODE_FOR_sse4_1_blendpd:
26961 case CODE_FOR_avx_vpermilv2df:
26962 case CODE_FOR_xop_vpermil2v2df3:
26963 case CODE_FOR_xop_vpermil2v4sf3:
26964 case CODE_FOR_xop_vpermil2v4df3:
26965 case CODE_FOR_xop_vpermil2v8sf3:
26966 error ("the last argument must be a 2-bit immediate");
26967 return const0_rtx;
26968
26969 case CODE_FOR_avx_vextractf128v4df:
26970 case CODE_FOR_avx_vextractf128v8sf:
26971 case CODE_FOR_avx_vextractf128v8si:
26972 case CODE_FOR_avx_vinsertf128v4df:
26973 case CODE_FOR_avx_vinsertf128v8sf:
26974 case CODE_FOR_avx_vinsertf128v8si:
26975 error ("the last argument must be a 1-bit immediate");
26976 return const0_rtx;
26977
26978 case CODE_FOR_avx_vmcmpv2df3:
26979 case CODE_FOR_avx_vmcmpv4sf3:
26980 case CODE_FOR_avx_cmpv2df3:
26981 case CODE_FOR_avx_cmpv4sf3:
26982 case CODE_FOR_avx_cmpv4df3:
26983 case CODE_FOR_avx_cmpv8sf3:
26984 error ("the last argument must be a 5-bit immediate");
26985 return const0_rtx;
26986
26987 default:
26988 switch (nargs_constant)
26989 {
26990 case 2:
26991 if ((nargs - i) == nargs_constant)
26992 {
26993 error ("the next to last argument must be an 8-bit immediate");
26994 break;
26995 }
26996 case 1:
26997 error ("the last argument must be an 8-bit immediate");
26998 break;
26999 default:
27000 gcc_unreachable ();
27001 }
27002 return const0_rtx;
27003 }
27004 }
27005 else
27006 {
27007 if (VECTOR_MODE_P (mode))
27008 op = safe_vector_operand (op, mode);
27009
27010 /* If we aren't optimizing, only allow one memory operand to
27011 be generated. */
27012 if (memory_operand (op, mode))
27013 num_memory++;
27014
27015 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27016 {
27017 if (optimize || !match || num_memory > 1)
27018 op = copy_to_mode_reg (mode, op);
27019 }
27020 else
27021 {
27022 op = copy_to_reg (op);
27023 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27024 }
27025 }
27026
27027 args[i].op = op;
27028 args[i].mode = mode;
27029 }
27030
27031 switch (nargs)
27032 {
27033 case 1:
27034 pat = GEN_FCN (icode) (real_target, args[0].op);
27035 break;
27036 case 2:
27037 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27038 break;
27039 case 3:
27040 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27041 args[2].op);
27042 break;
27043 case 4:
27044 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27045 args[2].op, args[3].op);
27046 break;
27047 default:
27048 gcc_unreachable ();
27049 }
27050
27051 if (! pat)
27052 return 0;
27053
27054 emit_insn (pat);
27055 return target;
27056 }
27057
27058 /* Subroutine of ix86_expand_builtin to take care of special insns
27059 with variable number of operands. */
27060
27061 static rtx
27062 ix86_expand_special_args_builtin (const struct builtin_description *d,
27063 tree exp, rtx target)
27064 {
27065 tree arg;
27066 rtx pat, op;
27067 unsigned int i, nargs, arg_adjust, memory;
27068 struct
27069 {
27070 rtx op;
27071 enum machine_mode mode;
27072 } args[3];
27073 enum insn_code icode = d->icode;
27074 bool last_arg_constant = false;
27075 const struct insn_data_d *insn_p = &insn_data[icode];
27076 enum machine_mode tmode = insn_p->operand[0].mode;
27077 enum { load, store } klass;
27078
27079 switch ((enum ix86_builtin_func_type) d->flag)
27080 {
27081 case VOID_FTYPE_VOID:
27082 if (icode == CODE_FOR_avx_vzeroupper)
27083 target = GEN_INT (vzeroupper_intrinsic);
27084 emit_insn (GEN_FCN (icode) (target));
27085 return 0;
27086 case VOID_FTYPE_UINT64:
27087 case VOID_FTYPE_UNSIGNED:
27088 nargs = 0;
27089 klass = store;
27090 memory = 0;
27091 break;
27092 break;
27093 case UINT64_FTYPE_VOID:
27094 case UNSIGNED_FTYPE_VOID:
27095 nargs = 0;
27096 klass = load;
27097 memory = 0;
27098 break;
27099 case UINT64_FTYPE_PUNSIGNED:
27100 case V2DI_FTYPE_PV2DI:
27101 case V32QI_FTYPE_PCCHAR:
27102 case V16QI_FTYPE_PCCHAR:
27103 case V8SF_FTYPE_PCV4SF:
27104 case V8SF_FTYPE_PCFLOAT:
27105 case V4SF_FTYPE_PCFLOAT:
27106 case V4DF_FTYPE_PCV2DF:
27107 case V4DF_FTYPE_PCDOUBLE:
27108 case V2DF_FTYPE_PCDOUBLE:
27109 case VOID_FTYPE_PVOID:
27110 nargs = 1;
27111 klass = load;
27112 memory = 0;
27113 break;
27114 case VOID_FTYPE_PV2SF_V4SF:
27115 case VOID_FTYPE_PV4DI_V4DI:
27116 case VOID_FTYPE_PV2DI_V2DI:
27117 case VOID_FTYPE_PCHAR_V32QI:
27118 case VOID_FTYPE_PCHAR_V16QI:
27119 case VOID_FTYPE_PFLOAT_V8SF:
27120 case VOID_FTYPE_PFLOAT_V4SF:
27121 case VOID_FTYPE_PDOUBLE_V4DF:
27122 case VOID_FTYPE_PDOUBLE_V2DF:
27123 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27124 case VOID_FTYPE_PINT_INT:
27125 nargs = 1;
27126 klass = store;
27127 /* Reserve memory operand for target. */
27128 memory = ARRAY_SIZE (args);
27129 break;
27130 case V4SF_FTYPE_V4SF_PCV2SF:
27131 case V2DF_FTYPE_V2DF_PCDOUBLE:
27132 nargs = 2;
27133 klass = load;
27134 memory = 1;
27135 break;
27136 case V8SF_FTYPE_PCV8SF_V8SI:
27137 case V4DF_FTYPE_PCV4DF_V4DI:
27138 case V4SF_FTYPE_PCV4SF_V4SI:
27139 case V2DF_FTYPE_PCV2DF_V2DI:
27140 nargs = 2;
27141 klass = load;
27142 memory = 0;
27143 break;
27144 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27145 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27146 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27147 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27148 nargs = 2;
27149 klass = store;
27150 /* Reserve memory operand for target. */
27151 memory = ARRAY_SIZE (args);
27152 break;
27153 case VOID_FTYPE_UINT_UINT_UINT:
27154 case VOID_FTYPE_UINT64_UINT_UINT:
27155 case UCHAR_FTYPE_UINT_UINT_UINT:
27156 case UCHAR_FTYPE_UINT64_UINT_UINT:
27157 nargs = 3;
27158 klass = load;
27159 memory = ARRAY_SIZE (args);
27160 last_arg_constant = true;
27161 break;
27162 default:
27163 gcc_unreachable ();
27164 }
27165
27166 gcc_assert (nargs <= ARRAY_SIZE (args));
27167
27168 if (klass == store)
27169 {
27170 arg = CALL_EXPR_ARG (exp, 0);
27171 op = expand_normal (arg);
27172 gcc_assert (target == 0);
27173 if (memory)
27174 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27175 else
27176 target = force_reg (tmode, op);
27177 arg_adjust = 1;
27178 }
27179 else
27180 {
27181 arg_adjust = 0;
27182 if (optimize
27183 || target == 0
27184 || GET_MODE (target) != tmode
27185 || !insn_p->operand[0].predicate (target, tmode))
27186 target = gen_reg_rtx (tmode);
27187 }
27188
27189 for (i = 0; i < nargs; i++)
27190 {
27191 enum machine_mode mode = insn_p->operand[i + 1].mode;
27192 bool match;
27193
27194 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27195 op = expand_normal (arg);
27196 match = insn_p->operand[i + 1].predicate (op, mode);
27197
27198 if (last_arg_constant && (i + 1) == nargs)
27199 {
27200 if (!match)
27201 {
27202 if (icode == CODE_FOR_lwp_lwpvalsi3
27203 || icode == CODE_FOR_lwp_lwpinssi3
27204 || icode == CODE_FOR_lwp_lwpvaldi3
27205 || icode == CODE_FOR_lwp_lwpinsdi3)
27206 error ("the last argument must be a 32-bit immediate");
27207 else
27208 error ("the last argument must be an 8-bit immediate");
27209 return const0_rtx;
27210 }
27211 }
27212 else
27213 {
27214 if (i == memory)
27215 {
27216 /* This must be the memory operand. */
27217 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27218 gcc_assert (GET_MODE (op) == mode
27219 || GET_MODE (op) == VOIDmode);
27220 }
27221 else
27222 {
27223 /* This must be register. */
27224 if (VECTOR_MODE_P (mode))
27225 op = safe_vector_operand (op, mode);
27226
27227 gcc_assert (GET_MODE (op) == mode
27228 || GET_MODE (op) == VOIDmode);
27229 op = copy_to_mode_reg (mode, op);
27230 }
27231 }
27232
27233 args[i].op = op;
27234 args[i].mode = mode;
27235 }
27236
27237 switch (nargs)
27238 {
27239 case 0:
27240 pat = GEN_FCN (icode) (target);
27241 break;
27242 case 1:
27243 pat = GEN_FCN (icode) (target, args[0].op);
27244 break;
27245 case 2:
27246 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27247 break;
27248 case 3:
27249 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27250 break;
27251 default:
27252 gcc_unreachable ();
27253 }
27254
27255 if (! pat)
27256 return 0;
27257 emit_insn (pat);
27258 return klass == store ? 0 : target;
27259 }
27260
27261 /* Return the integer constant in ARG. Constrain it to be in the range
27262 of the subparts of VEC_TYPE; issue an error if not. */
27263
27264 static int
27265 get_element_number (tree vec_type, tree arg)
27266 {
27267 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27268
27269 if (!host_integerp (arg, 1)
27270 || (elt = tree_low_cst (arg, 1), elt > max))
27271 {
27272 error ("selector must be an integer constant in the range 0..%wi", max);
27273 return 0;
27274 }
27275
27276 return elt;
27277 }
27278
27279 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27280 ix86_expand_vector_init. We DO have language-level syntax for this, in
27281 the form of (type){ init-list }. Except that since we can't place emms
27282 instructions from inside the compiler, we can't allow the use of MMX
27283 registers unless the user explicitly asks for it. So we do *not* define
27284 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27285 we have builtins invoked by mmintrin.h that gives us license to emit
27286 these sorts of instructions. */
27287
27288 static rtx
27289 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27290 {
27291 enum machine_mode tmode = TYPE_MODE (type);
27292 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27293 int i, n_elt = GET_MODE_NUNITS (tmode);
27294 rtvec v = rtvec_alloc (n_elt);
27295
27296 gcc_assert (VECTOR_MODE_P (tmode));
27297 gcc_assert (call_expr_nargs (exp) == n_elt);
27298
27299 for (i = 0; i < n_elt; ++i)
27300 {
27301 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27302 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27303 }
27304
27305 if (!target || !register_operand (target, tmode))
27306 target = gen_reg_rtx (tmode);
27307
27308 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27309 return target;
27310 }
27311
27312 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27313 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27314 had a language-level syntax for referencing vector elements. */
27315
27316 static rtx
27317 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27318 {
27319 enum machine_mode tmode, mode0;
27320 tree arg0, arg1;
27321 int elt;
27322 rtx op0;
27323
27324 arg0 = CALL_EXPR_ARG (exp, 0);
27325 arg1 = CALL_EXPR_ARG (exp, 1);
27326
27327 op0 = expand_normal (arg0);
27328 elt = get_element_number (TREE_TYPE (arg0), arg1);
27329
27330 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27331 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27332 gcc_assert (VECTOR_MODE_P (mode0));
27333
27334 op0 = force_reg (mode0, op0);
27335
27336 if (optimize || !target || !register_operand (target, tmode))
27337 target = gen_reg_rtx (tmode);
27338
27339 ix86_expand_vector_extract (true, target, op0, elt);
27340
27341 return target;
27342 }
27343
27344 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27345 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27346 a language-level syntax for referencing vector elements. */
27347
27348 static rtx
27349 ix86_expand_vec_set_builtin (tree exp)
27350 {
27351 enum machine_mode tmode, mode1;
27352 tree arg0, arg1, arg2;
27353 int elt;
27354 rtx op0, op1, target;
27355
27356 arg0 = CALL_EXPR_ARG (exp, 0);
27357 arg1 = CALL_EXPR_ARG (exp, 1);
27358 arg2 = CALL_EXPR_ARG (exp, 2);
27359
27360 tmode = TYPE_MODE (TREE_TYPE (arg0));
27361 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27362 gcc_assert (VECTOR_MODE_P (tmode));
27363
27364 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27365 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27366 elt = get_element_number (TREE_TYPE (arg0), arg2);
27367
27368 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27369 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27370
27371 op0 = force_reg (tmode, op0);
27372 op1 = force_reg (mode1, op1);
27373
27374 /* OP0 is the source of these builtin functions and shouldn't be
27375 modified. Create a copy, use it and return it as target. */
27376 target = gen_reg_rtx (tmode);
27377 emit_move_insn (target, op0);
27378 ix86_expand_vector_set (true, target, op1, elt);
27379
27380 return target;
27381 }
27382
27383 /* Expand an expression EXP that calls a built-in function,
27384 with result going to TARGET if that's convenient
27385 (and in mode MODE if that's convenient).
27386 SUBTARGET may be used as the target for computing one of EXP's operands.
27387 IGNORE is nonzero if the value is to be ignored. */
27388
27389 static rtx
27390 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27391 enum machine_mode mode ATTRIBUTE_UNUSED,
27392 int ignore ATTRIBUTE_UNUSED)
27393 {
27394 const struct builtin_description *d;
27395 size_t i;
27396 enum insn_code icode;
27397 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27398 tree arg0, arg1, arg2;
27399 rtx op0, op1, op2, pat;
27400 enum machine_mode mode0, mode1, mode2;
27401 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27402
27403 /* Determine whether the builtin function is available under the current ISA.
27404 Originally the builtin was not created if it wasn't applicable to the
27405 current ISA based on the command line switches. With function specific
27406 options, we need to check in the context of the function making the call
27407 whether it is supported. */
27408 if (ix86_builtins_isa[fcode].isa
27409 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27410 {
27411 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27412 NULL, NULL, false);
27413
27414 if (!opts)
27415 error ("%qE needs unknown isa option", fndecl);
27416 else
27417 {
27418 gcc_assert (opts != NULL);
27419 error ("%qE needs isa option %s", fndecl, opts);
27420 free (opts);
27421 }
27422 return const0_rtx;
27423 }
27424
27425 switch (fcode)
27426 {
27427 case IX86_BUILTIN_MASKMOVQ:
27428 case IX86_BUILTIN_MASKMOVDQU:
27429 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27430 ? CODE_FOR_mmx_maskmovq
27431 : CODE_FOR_sse2_maskmovdqu);
27432 /* Note the arg order is different from the operand order. */
27433 arg1 = CALL_EXPR_ARG (exp, 0);
27434 arg2 = CALL_EXPR_ARG (exp, 1);
27435 arg0 = CALL_EXPR_ARG (exp, 2);
27436 op0 = expand_normal (arg0);
27437 op1 = expand_normal (arg1);
27438 op2 = expand_normal (arg2);
27439 mode0 = insn_data[icode].operand[0].mode;
27440 mode1 = insn_data[icode].operand[1].mode;
27441 mode2 = insn_data[icode].operand[2].mode;
27442
27443 op0 = force_reg (Pmode, op0);
27444 op0 = gen_rtx_MEM (mode1, op0);
27445
27446 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27447 op0 = copy_to_mode_reg (mode0, op0);
27448 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27449 op1 = copy_to_mode_reg (mode1, op1);
27450 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27451 op2 = copy_to_mode_reg (mode2, op2);
27452 pat = GEN_FCN (icode) (op0, op1, op2);
27453 if (! pat)
27454 return 0;
27455 emit_insn (pat);
27456 return 0;
27457
27458 case IX86_BUILTIN_LDMXCSR:
27459 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27460 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27461 emit_move_insn (target, op0);
27462 emit_insn (gen_sse_ldmxcsr (target));
27463 return 0;
27464
27465 case IX86_BUILTIN_STMXCSR:
27466 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27467 emit_insn (gen_sse_stmxcsr (target));
27468 return copy_to_mode_reg (SImode, target);
27469
27470 case IX86_BUILTIN_CLFLUSH:
27471 arg0 = CALL_EXPR_ARG (exp, 0);
27472 op0 = expand_normal (arg0);
27473 icode = CODE_FOR_sse2_clflush;
27474 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27475 op0 = copy_to_mode_reg (Pmode, op0);
27476
27477 emit_insn (gen_sse2_clflush (op0));
27478 return 0;
27479
27480 case IX86_BUILTIN_MONITOR:
27481 arg0 = CALL_EXPR_ARG (exp, 0);
27482 arg1 = CALL_EXPR_ARG (exp, 1);
27483 arg2 = CALL_EXPR_ARG (exp, 2);
27484 op0 = expand_normal (arg0);
27485 op1 = expand_normal (arg1);
27486 op2 = expand_normal (arg2);
27487 if (!REG_P (op0))
27488 op0 = copy_to_mode_reg (Pmode, op0);
27489 if (!REG_P (op1))
27490 op1 = copy_to_mode_reg (SImode, op1);
27491 if (!REG_P (op2))
27492 op2 = copy_to_mode_reg (SImode, op2);
27493 emit_insn (ix86_gen_monitor (op0, op1, op2));
27494 return 0;
27495
27496 case IX86_BUILTIN_MWAIT:
27497 arg0 = CALL_EXPR_ARG (exp, 0);
27498 arg1 = CALL_EXPR_ARG (exp, 1);
27499 op0 = expand_normal (arg0);
27500 op1 = expand_normal (arg1);
27501 if (!REG_P (op0))
27502 op0 = copy_to_mode_reg (SImode, op0);
27503 if (!REG_P (op1))
27504 op1 = copy_to_mode_reg (SImode, op1);
27505 emit_insn (gen_sse3_mwait (op0, op1));
27506 return 0;
27507
27508 case IX86_BUILTIN_VEC_INIT_V2SI:
27509 case IX86_BUILTIN_VEC_INIT_V4HI:
27510 case IX86_BUILTIN_VEC_INIT_V8QI:
27511 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27512
27513 case IX86_BUILTIN_VEC_EXT_V2DF:
27514 case IX86_BUILTIN_VEC_EXT_V2DI:
27515 case IX86_BUILTIN_VEC_EXT_V4SF:
27516 case IX86_BUILTIN_VEC_EXT_V4SI:
27517 case IX86_BUILTIN_VEC_EXT_V8HI:
27518 case IX86_BUILTIN_VEC_EXT_V2SI:
27519 case IX86_BUILTIN_VEC_EXT_V4HI:
27520 case IX86_BUILTIN_VEC_EXT_V16QI:
27521 return ix86_expand_vec_ext_builtin (exp, target);
27522
27523 case IX86_BUILTIN_VEC_SET_V2DI:
27524 case IX86_BUILTIN_VEC_SET_V4SF:
27525 case IX86_BUILTIN_VEC_SET_V4SI:
27526 case IX86_BUILTIN_VEC_SET_V8HI:
27527 case IX86_BUILTIN_VEC_SET_V4HI:
27528 case IX86_BUILTIN_VEC_SET_V16QI:
27529 return ix86_expand_vec_set_builtin (exp);
27530
27531 case IX86_BUILTIN_VEC_PERM_V2DF:
27532 case IX86_BUILTIN_VEC_PERM_V4SF:
27533 case IX86_BUILTIN_VEC_PERM_V2DI:
27534 case IX86_BUILTIN_VEC_PERM_V4SI:
27535 case IX86_BUILTIN_VEC_PERM_V8HI:
27536 case IX86_BUILTIN_VEC_PERM_V16QI:
27537 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27538 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27539 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27540 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27541 case IX86_BUILTIN_VEC_PERM_V4DF:
27542 case IX86_BUILTIN_VEC_PERM_V8SF:
27543 return ix86_expand_vec_perm_builtin (exp);
27544
27545 case IX86_BUILTIN_INFQ:
27546 case IX86_BUILTIN_HUGE_VALQ:
27547 {
27548 REAL_VALUE_TYPE inf;
27549 rtx tmp;
27550
27551 real_inf (&inf);
27552 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27553
27554 tmp = validize_mem (force_const_mem (mode, tmp));
27555
27556 if (target == 0)
27557 target = gen_reg_rtx (mode);
27558
27559 emit_move_insn (target, tmp);
27560 return target;
27561 }
27562
27563 case IX86_BUILTIN_LLWPCB:
27564 arg0 = CALL_EXPR_ARG (exp, 0);
27565 op0 = expand_normal (arg0);
27566 icode = CODE_FOR_lwp_llwpcb;
27567 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27568 op0 = copy_to_mode_reg (Pmode, op0);
27569 emit_insn (gen_lwp_llwpcb (op0));
27570 return 0;
27571
27572 case IX86_BUILTIN_SLWPCB:
27573 icode = CODE_FOR_lwp_slwpcb;
27574 if (!target
27575 || !insn_data[icode].operand[0].predicate (target, Pmode))
27576 target = gen_reg_rtx (Pmode);
27577 emit_insn (gen_lwp_slwpcb (target));
27578 return target;
27579
27580 case IX86_BUILTIN_BEXTRI32:
27581 case IX86_BUILTIN_BEXTRI64:
27582 arg0 = CALL_EXPR_ARG (exp, 0);
27583 arg1 = CALL_EXPR_ARG (exp, 1);
27584 op0 = expand_normal (arg0);
27585 op1 = expand_normal (arg1);
27586 icode = (fcode == IX86_BUILTIN_BEXTRI32
27587 ? CODE_FOR_tbm_bextri_si
27588 : CODE_FOR_tbm_bextri_di);
27589 if (!CONST_INT_P (op1))
27590 {
27591 error ("last argument must be an immediate");
27592 return const0_rtx;
27593 }
27594 else
27595 {
27596 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27597 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27598 op1 = GEN_INT (length);
27599 op2 = GEN_INT (lsb_index);
27600 pat = GEN_FCN (icode) (target, op0, op1, op2);
27601 if (pat)
27602 emit_insn (pat);
27603 return target;
27604 }
27605
27606 case IX86_BUILTIN_RDRAND16_STEP:
27607 icode = CODE_FOR_rdrandhi_1;
27608 mode0 = HImode;
27609 goto rdrand_step;
27610
27611 case IX86_BUILTIN_RDRAND32_STEP:
27612 icode = CODE_FOR_rdrandsi_1;
27613 mode0 = SImode;
27614 goto rdrand_step;
27615
27616 case IX86_BUILTIN_RDRAND64_STEP:
27617 icode = CODE_FOR_rdranddi_1;
27618 mode0 = DImode;
27619
27620 rdrand_step:
27621 op0 = gen_reg_rtx (mode0);
27622 emit_insn (GEN_FCN (icode) (op0));
27623
27624 op1 = gen_reg_rtx (SImode);
27625 emit_move_insn (op1, CONST1_RTX (SImode));
27626
27627 /* Emit SImode conditional move. */
27628 if (mode0 == HImode)
27629 {
27630 op2 = gen_reg_rtx (SImode);
27631 emit_insn (gen_zero_extendhisi2 (op2, op0));
27632 }
27633 else if (mode0 == SImode)
27634 op2 = op0;
27635 else
27636 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27637
27638 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27639 const0_rtx);
27640 emit_insn (gen_rtx_SET (VOIDmode, op1,
27641 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27642 emit_move_insn (target, op1);
27643
27644 arg0 = CALL_EXPR_ARG (exp, 0);
27645 op1 = expand_normal (arg0);
27646 if (!address_operand (op1, VOIDmode))
27647 op1 = copy_addr_to_reg (op1);
27648 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27649 return target;
27650
27651 default:
27652 break;
27653 }
27654
27655 for (i = 0, d = bdesc_special_args;
27656 i < ARRAY_SIZE (bdesc_special_args);
27657 i++, d++)
27658 if (d->code == fcode)
27659 return ix86_expand_special_args_builtin (d, exp, target);
27660
27661 for (i = 0, d = bdesc_args;
27662 i < ARRAY_SIZE (bdesc_args);
27663 i++, d++)
27664 if (d->code == fcode)
27665 switch (fcode)
27666 {
27667 case IX86_BUILTIN_FABSQ:
27668 case IX86_BUILTIN_COPYSIGNQ:
27669 if (!TARGET_SSE2)
27670 /* Emit a normal call if SSE2 isn't available. */
27671 return expand_call (exp, target, ignore);
27672 default:
27673 return ix86_expand_args_builtin (d, exp, target);
27674 }
27675
27676 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27677 if (d->code == fcode)
27678 return ix86_expand_sse_comi (d, exp, target);
27679
27680 for (i = 0, d = bdesc_pcmpestr;
27681 i < ARRAY_SIZE (bdesc_pcmpestr);
27682 i++, d++)
27683 if (d->code == fcode)
27684 return ix86_expand_sse_pcmpestr (d, exp, target);
27685
27686 for (i = 0, d = bdesc_pcmpistr;
27687 i < ARRAY_SIZE (bdesc_pcmpistr);
27688 i++, d++)
27689 if (d->code == fcode)
27690 return ix86_expand_sse_pcmpistr (d, exp, target);
27691
27692 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27693 if (d->code == fcode)
27694 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27695 (enum ix86_builtin_func_type)
27696 d->flag, d->comparison);
27697
27698 gcc_unreachable ();
27699 }
27700
27701 /* Returns a function decl for a vectorized version of the builtin function
27702 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27703 if it is not available. */
27704
27705 static tree
27706 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27707 tree type_in)
27708 {
27709 enum machine_mode in_mode, out_mode;
27710 int in_n, out_n;
27711 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27712
27713 if (TREE_CODE (type_out) != VECTOR_TYPE
27714 || TREE_CODE (type_in) != VECTOR_TYPE
27715 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27716 return NULL_TREE;
27717
27718 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27719 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27720 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27721 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27722
27723 switch (fn)
27724 {
27725 case BUILT_IN_SQRT:
27726 if (out_mode == DFmode && in_mode == DFmode)
27727 {
27728 if (out_n == 2 && in_n == 2)
27729 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27730 else if (out_n == 4 && in_n == 4)
27731 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27732 }
27733 break;
27734
27735 case BUILT_IN_SQRTF:
27736 if (out_mode == SFmode && in_mode == SFmode)
27737 {
27738 if (out_n == 4 && in_n == 4)
27739 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27740 else if (out_n == 8 && in_n == 8)
27741 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27742 }
27743 break;
27744
27745 case BUILT_IN_LRINT:
27746 if (out_mode == SImode && out_n == 4
27747 && in_mode == DFmode && in_n == 2)
27748 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27749 break;
27750
27751 case BUILT_IN_LRINTF:
27752 if (out_mode == SImode && in_mode == SFmode)
27753 {
27754 if (out_n == 4 && in_n == 4)
27755 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27756 else if (out_n == 8 && in_n == 8)
27757 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27758 }
27759 break;
27760
27761 case BUILT_IN_COPYSIGN:
27762 if (out_mode == DFmode && in_mode == DFmode)
27763 {
27764 if (out_n == 2 && in_n == 2)
27765 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27766 else if (out_n == 4 && in_n == 4)
27767 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27768 }
27769 break;
27770
27771 case BUILT_IN_COPYSIGNF:
27772 if (out_mode == SFmode && in_mode == SFmode)
27773 {
27774 if (out_n == 4 && in_n == 4)
27775 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27776 else if (out_n == 8 && in_n == 8)
27777 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27778 }
27779 break;
27780
27781 case BUILT_IN_FLOOR:
27782 /* The round insn does not trap on denormals. */
27783 if (flag_trapping_math || !TARGET_ROUND)
27784 break;
27785
27786 if (out_mode == DFmode && in_mode == DFmode)
27787 {
27788 if (out_n == 2 && in_n == 2)
27789 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27790 else if (out_n == 4 && in_n == 4)
27791 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27792 }
27793 break;
27794
27795 case BUILT_IN_FLOORF:
27796 /* The round insn does not trap on denormals. */
27797 if (flag_trapping_math || !TARGET_ROUND)
27798 break;
27799
27800 if (out_mode == SFmode && in_mode == SFmode)
27801 {
27802 if (out_n == 4 && in_n == 4)
27803 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27804 else if (out_n == 8 && in_n == 8)
27805 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27806 }
27807 break;
27808
27809 case BUILT_IN_CEIL:
27810 /* The round insn does not trap on denormals. */
27811 if (flag_trapping_math || !TARGET_ROUND)
27812 break;
27813
27814 if (out_mode == DFmode && in_mode == DFmode)
27815 {
27816 if (out_n == 2 && in_n == 2)
27817 return ix86_builtins[IX86_BUILTIN_CEILPD];
27818 else if (out_n == 4 && in_n == 4)
27819 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27820 }
27821 break;
27822
27823 case BUILT_IN_CEILF:
27824 /* The round insn does not trap on denormals. */
27825 if (flag_trapping_math || !TARGET_ROUND)
27826 break;
27827
27828 if (out_mode == SFmode && in_mode == SFmode)
27829 {
27830 if (out_n == 4 && in_n == 4)
27831 return ix86_builtins[IX86_BUILTIN_CEILPS];
27832 else if (out_n == 8 && in_n == 8)
27833 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27834 }
27835 break;
27836
27837 case BUILT_IN_TRUNC:
27838 /* The round insn does not trap on denormals. */
27839 if (flag_trapping_math || !TARGET_ROUND)
27840 break;
27841
27842 if (out_mode == DFmode && in_mode == DFmode)
27843 {
27844 if (out_n == 2 && in_n == 2)
27845 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27846 else if (out_n == 4 && in_n == 4)
27847 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27848 }
27849 break;
27850
27851 case BUILT_IN_TRUNCF:
27852 /* The round insn does not trap on denormals. */
27853 if (flag_trapping_math || !TARGET_ROUND)
27854 break;
27855
27856 if (out_mode == SFmode && in_mode == SFmode)
27857 {
27858 if (out_n == 4 && in_n == 4)
27859 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27860 else if (out_n == 8 && in_n == 8)
27861 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27862 }
27863 break;
27864
27865 case BUILT_IN_RINT:
27866 /* The round insn does not trap on denormals. */
27867 if (flag_trapping_math || !TARGET_ROUND)
27868 break;
27869
27870 if (out_mode == DFmode && in_mode == DFmode)
27871 {
27872 if (out_n == 2 && in_n == 2)
27873 return ix86_builtins[IX86_BUILTIN_RINTPD];
27874 else if (out_n == 4 && in_n == 4)
27875 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27876 }
27877 break;
27878
27879 case BUILT_IN_RINTF:
27880 /* The round insn does not trap on denormals. */
27881 if (flag_trapping_math || !TARGET_ROUND)
27882 break;
27883
27884 if (out_mode == SFmode && in_mode == SFmode)
27885 {
27886 if (out_n == 4 && in_n == 4)
27887 return ix86_builtins[IX86_BUILTIN_RINTPS];
27888 else if (out_n == 8 && in_n == 8)
27889 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27890 }
27891 break;
27892
27893 case BUILT_IN_FMA:
27894 if (out_mode == DFmode && in_mode == DFmode)
27895 {
27896 if (out_n == 2 && in_n == 2)
27897 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27898 if (out_n == 4 && in_n == 4)
27899 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27900 }
27901 break;
27902
27903 case BUILT_IN_FMAF:
27904 if (out_mode == SFmode && in_mode == SFmode)
27905 {
27906 if (out_n == 4 && in_n == 4)
27907 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27908 if (out_n == 8 && in_n == 8)
27909 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27910 }
27911 break;
27912
27913 default:
27914 break;
27915 }
27916
27917 /* Dispatch to a handler for a vectorization library. */
27918 if (ix86_veclib_handler)
27919 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27920 type_in);
27921
27922 return NULL_TREE;
27923 }
27924
27925 /* Handler for an SVML-style interface to
27926 a library with vectorized intrinsics. */
27927
27928 static tree
27929 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27930 {
27931 char name[20];
27932 tree fntype, new_fndecl, args;
27933 unsigned arity;
27934 const char *bname;
27935 enum machine_mode el_mode, in_mode;
27936 int n, in_n;
27937
27938 /* The SVML is suitable for unsafe math only. */
27939 if (!flag_unsafe_math_optimizations)
27940 return NULL_TREE;
27941
27942 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27943 n = TYPE_VECTOR_SUBPARTS (type_out);
27944 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27945 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27946 if (el_mode != in_mode
27947 || n != in_n)
27948 return NULL_TREE;
27949
27950 switch (fn)
27951 {
27952 case BUILT_IN_EXP:
27953 case BUILT_IN_LOG:
27954 case BUILT_IN_LOG10:
27955 case BUILT_IN_POW:
27956 case BUILT_IN_TANH:
27957 case BUILT_IN_TAN:
27958 case BUILT_IN_ATAN:
27959 case BUILT_IN_ATAN2:
27960 case BUILT_IN_ATANH:
27961 case BUILT_IN_CBRT:
27962 case BUILT_IN_SINH:
27963 case BUILT_IN_SIN:
27964 case BUILT_IN_ASINH:
27965 case BUILT_IN_ASIN:
27966 case BUILT_IN_COSH:
27967 case BUILT_IN_COS:
27968 case BUILT_IN_ACOSH:
27969 case BUILT_IN_ACOS:
27970 if (el_mode != DFmode || n != 2)
27971 return NULL_TREE;
27972 break;
27973
27974 case BUILT_IN_EXPF:
27975 case BUILT_IN_LOGF:
27976 case BUILT_IN_LOG10F:
27977 case BUILT_IN_POWF:
27978 case BUILT_IN_TANHF:
27979 case BUILT_IN_TANF:
27980 case BUILT_IN_ATANF:
27981 case BUILT_IN_ATAN2F:
27982 case BUILT_IN_ATANHF:
27983 case BUILT_IN_CBRTF:
27984 case BUILT_IN_SINHF:
27985 case BUILT_IN_SINF:
27986 case BUILT_IN_ASINHF:
27987 case BUILT_IN_ASINF:
27988 case BUILT_IN_COSHF:
27989 case BUILT_IN_COSF:
27990 case BUILT_IN_ACOSHF:
27991 case BUILT_IN_ACOSF:
27992 if (el_mode != SFmode || n != 4)
27993 return NULL_TREE;
27994 break;
27995
27996 default:
27997 return NULL_TREE;
27998 }
27999
28000 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28001
28002 if (fn == BUILT_IN_LOGF)
28003 strcpy (name, "vmlsLn4");
28004 else if (fn == BUILT_IN_LOG)
28005 strcpy (name, "vmldLn2");
28006 else if (n == 4)
28007 {
28008 sprintf (name, "vmls%s", bname+10);
28009 name[strlen (name)-1] = '4';
28010 }
28011 else
28012 sprintf (name, "vmld%s2", bname+10);
28013
28014 /* Convert to uppercase. */
28015 name[4] &= ~0x20;
28016
28017 arity = 0;
28018 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28019 args = TREE_CHAIN (args))
28020 arity++;
28021
28022 if (arity == 1)
28023 fntype = build_function_type_list (type_out, type_in, NULL);
28024 else
28025 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28026
28027 /* Build a function declaration for the vectorized function. */
28028 new_fndecl = build_decl (BUILTINS_LOCATION,
28029 FUNCTION_DECL, get_identifier (name), fntype);
28030 TREE_PUBLIC (new_fndecl) = 1;
28031 DECL_EXTERNAL (new_fndecl) = 1;
28032 DECL_IS_NOVOPS (new_fndecl) = 1;
28033 TREE_READONLY (new_fndecl) = 1;
28034
28035 return new_fndecl;
28036 }
28037
28038 /* Handler for an ACML-style interface to
28039 a library with vectorized intrinsics. */
28040
28041 static tree
28042 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28043 {
28044 char name[20] = "__vr.._";
28045 tree fntype, new_fndecl, args;
28046 unsigned arity;
28047 const char *bname;
28048 enum machine_mode el_mode, in_mode;
28049 int n, in_n;
28050
28051 /* The ACML is 64bits only and suitable for unsafe math only as
28052 it does not correctly support parts of IEEE with the required
28053 precision such as denormals. */
28054 if (!TARGET_64BIT
28055 || !flag_unsafe_math_optimizations)
28056 return NULL_TREE;
28057
28058 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28059 n = TYPE_VECTOR_SUBPARTS (type_out);
28060 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28061 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28062 if (el_mode != in_mode
28063 || n != in_n)
28064 return NULL_TREE;
28065
28066 switch (fn)
28067 {
28068 case BUILT_IN_SIN:
28069 case BUILT_IN_COS:
28070 case BUILT_IN_EXP:
28071 case BUILT_IN_LOG:
28072 case BUILT_IN_LOG2:
28073 case BUILT_IN_LOG10:
28074 name[4] = 'd';
28075 name[5] = '2';
28076 if (el_mode != DFmode
28077 || n != 2)
28078 return NULL_TREE;
28079 break;
28080
28081 case BUILT_IN_SINF:
28082 case BUILT_IN_COSF:
28083 case BUILT_IN_EXPF:
28084 case BUILT_IN_POWF:
28085 case BUILT_IN_LOGF:
28086 case BUILT_IN_LOG2F:
28087 case BUILT_IN_LOG10F:
28088 name[4] = 's';
28089 name[5] = '4';
28090 if (el_mode != SFmode
28091 || n != 4)
28092 return NULL_TREE;
28093 break;
28094
28095 default:
28096 return NULL_TREE;
28097 }
28098
28099 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28100 sprintf (name + 7, "%s", bname+10);
28101
28102 arity = 0;
28103 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28104 args = TREE_CHAIN (args))
28105 arity++;
28106
28107 if (arity == 1)
28108 fntype = build_function_type_list (type_out, type_in, NULL);
28109 else
28110 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28111
28112 /* Build a function declaration for the vectorized function. */
28113 new_fndecl = build_decl (BUILTINS_LOCATION,
28114 FUNCTION_DECL, get_identifier (name), fntype);
28115 TREE_PUBLIC (new_fndecl) = 1;
28116 DECL_EXTERNAL (new_fndecl) = 1;
28117 DECL_IS_NOVOPS (new_fndecl) = 1;
28118 TREE_READONLY (new_fndecl) = 1;
28119
28120 return new_fndecl;
28121 }
28122
28123
28124 /* Returns a decl of a function that implements conversion of an integer vector
28125 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28126 are the types involved when converting according to CODE.
28127 Return NULL_TREE if it is not available. */
28128
28129 static tree
28130 ix86_vectorize_builtin_conversion (unsigned int code,
28131 tree dest_type, tree src_type)
28132 {
28133 if (! TARGET_SSE2)
28134 return NULL_TREE;
28135
28136 switch (code)
28137 {
28138 case FLOAT_EXPR:
28139 switch (TYPE_MODE (src_type))
28140 {
28141 case V4SImode:
28142 switch (TYPE_MODE (dest_type))
28143 {
28144 case V4SFmode:
28145 return (TYPE_UNSIGNED (src_type)
28146 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28147 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28148 case V4DFmode:
28149 return (TYPE_UNSIGNED (src_type)
28150 ? NULL_TREE
28151 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28152 default:
28153 return NULL_TREE;
28154 }
28155 break;
28156 case V8SImode:
28157 switch (TYPE_MODE (dest_type))
28158 {
28159 case V8SFmode:
28160 return (TYPE_UNSIGNED (src_type)
28161 ? NULL_TREE
28162 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28163 default:
28164 return NULL_TREE;
28165 }
28166 break;
28167 default:
28168 return NULL_TREE;
28169 }
28170
28171 case FIX_TRUNC_EXPR:
28172 switch (TYPE_MODE (dest_type))
28173 {
28174 case V4SImode:
28175 switch (TYPE_MODE (src_type))
28176 {
28177 case V4SFmode:
28178 return (TYPE_UNSIGNED (dest_type)
28179 ? NULL_TREE
28180 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28181 case V4DFmode:
28182 return (TYPE_UNSIGNED (dest_type)
28183 ? NULL_TREE
28184 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28185 default:
28186 return NULL_TREE;
28187 }
28188 break;
28189
28190 case V8SImode:
28191 switch (TYPE_MODE (src_type))
28192 {
28193 case V8SFmode:
28194 return (TYPE_UNSIGNED (dest_type)
28195 ? NULL_TREE
28196 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28197 default:
28198 return NULL_TREE;
28199 }
28200 break;
28201
28202 default:
28203 return NULL_TREE;
28204 }
28205
28206 default:
28207 return NULL_TREE;
28208 }
28209
28210 return NULL_TREE;
28211 }
28212
28213 /* Returns a code for a target-specific builtin that implements
28214 reciprocal of the function, or NULL_TREE if not available. */
28215
28216 static tree
28217 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28218 bool sqrt ATTRIBUTE_UNUSED)
28219 {
28220 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28221 && flag_finite_math_only && !flag_trapping_math
28222 && flag_unsafe_math_optimizations))
28223 return NULL_TREE;
28224
28225 if (md_fn)
28226 /* Machine dependent builtins. */
28227 switch (fn)
28228 {
28229 /* Vectorized version of sqrt to rsqrt conversion. */
28230 case IX86_BUILTIN_SQRTPS_NR:
28231 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28232
28233 case IX86_BUILTIN_SQRTPS_NR256:
28234 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28235
28236 default:
28237 return NULL_TREE;
28238 }
28239 else
28240 /* Normal builtins. */
28241 switch (fn)
28242 {
28243 /* Sqrt to rsqrt conversion. */
28244 case BUILT_IN_SQRTF:
28245 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28246
28247 default:
28248 return NULL_TREE;
28249 }
28250 }
28251 \f
28252 /* Helper for avx_vpermilps256_operand et al. This is also used by
28253 the expansion functions to turn the parallel back into a mask.
28254 The return value is 0 for no match and the imm8+1 for a match. */
28255
28256 int
28257 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28258 {
28259 unsigned i, nelt = GET_MODE_NUNITS (mode);
28260 unsigned mask = 0;
28261 unsigned char ipar[8];
28262
28263 if (XVECLEN (par, 0) != (int) nelt)
28264 return 0;
28265
28266 /* Validate that all of the elements are constants, and not totally
28267 out of range. Copy the data into an integral array to make the
28268 subsequent checks easier. */
28269 for (i = 0; i < nelt; ++i)
28270 {
28271 rtx er = XVECEXP (par, 0, i);
28272 unsigned HOST_WIDE_INT ei;
28273
28274 if (!CONST_INT_P (er))
28275 return 0;
28276 ei = INTVAL (er);
28277 if (ei >= nelt)
28278 return 0;
28279 ipar[i] = ei;
28280 }
28281
28282 switch (mode)
28283 {
28284 case V4DFmode:
28285 /* In the 256-bit DFmode case, we can only move elements within
28286 a 128-bit lane. */
28287 for (i = 0; i < 2; ++i)
28288 {
28289 if (ipar[i] >= 2)
28290 return 0;
28291 mask |= ipar[i] << i;
28292 }
28293 for (i = 2; i < 4; ++i)
28294 {
28295 if (ipar[i] < 2)
28296 return 0;
28297 mask |= (ipar[i] - 2) << i;
28298 }
28299 break;
28300
28301 case V8SFmode:
28302 /* In the 256-bit SFmode case, we have full freedom of movement
28303 within the low 128-bit lane, but the high 128-bit lane must
28304 mirror the exact same pattern. */
28305 for (i = 0; i < 4; ++i)
28306 if (ipar[i] + 4 != ipar[i + 4])
28307 return 0;
28308 nelt = 4;
28309 /* FALLTHRU */
28310
28311 case V2DFmode:
28312 case V4SFmode:
28313 /* In the 128-bit case, we've full freedom in the placement of
28314 the elements from the source operand. */
28315 for (i = 0; i < nelt; ++i)
28316 mask |= ipar[i] << (i * (nelt / 2));
28317 break;
28318
28319 default:
28320 gcc_unreachable ();
28321 }
28322
28323 /* Make sure success has a non-zero value by adding one. */
28324 return mask + 1;
28325 }
28326
28327 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28328 the expansion functions to turn the parallel back into a mask.
28329 The return value is 0 for no match and the imm8+1 for a match. */
28330
28331 int
28332 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28333 {
28334 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28335 unsigned mask = 0;
28336 unsigned char ipar[8];
28337
28338 if (XVECLEN (par, 0) != (int) nelt)
28339 return 0;
28340
28341 /* Validate that all of the elements are constants, and not totally
28342 out of range. Copy the data into an integral array to make the
28343 subsequent checks easier. */
28344 for (i = 0; i < nelt; ++i)
28345 {
28346 rtx er = XVECEXP (par, 0, i);
28347 unsigned HOST_WIDE_INT ei;
28348
28349 if (!CONST_INT_P (er))
28350 return 0;
28351 ei = INTVAL (er);
28352 if (ei >= 2 * nelt)
28353 return 0;
28354 ipar[i] = ei;
28355 }
28356
28357 /* Validate that the halves of the permute are halves. */
28358 for (i = 0; i < nelt2 - 1; ++i)
28359 if (ipar[i] + 1 != ipar[i + 1])
28360 return 0;
28361 for (i = nelt2; i < nelt - 1; ++i)
28362 if (ipar[i] + 1 != ipar[i + 1])
28363 return 0;
28364
28365 /* Reconstruct the mask. */
28366 for (i = 0; i < 2; ++i)
28367 {
28368 unsigned e = ipar[i * nelt2];
28369 if (e % nelt2)
28370 return 0;
28371 e /= nelt2;
28372 mask |= e << (i * 4);
28373 }
28374
28375 /* Make sure success has a non-zero value by adding one. */
28376 return mask + 1;
28377 }
28378 \f
28379
28380 /* Store OPERAND to the memory after reload is completed. This means
28381 that we can't easily use assign_stack_local. */
28382 rtx
28383 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28384 {
28385 rtx result;
28386
28387 gcc_assert (reload_completed);
28388 if (ix86_using_red_zone ())
28389 {
28390 result = gen_rtx_MEM (mode,
28391 gen_rtx_PLUS (Pmode,
28392 stack_pointer_rtx,
28393 GEN_INT (-RED_ZONE_SIZE)));
28394 emit_move_insn (result, operand);
28395 }
28396 else if (TARGET_64BIT)
28397 {
28398 switch (mode)
28399 {
28400 case HImode:
28401 case SImode:
28402 operand = gen_lowpart (DImode, operand);
28403 /* FALLTHRU */
28404 case DImode:
28405 emit_insn (
28406 gen_rtx_SET (VOIDmode,
28407 gen_rtx_MEM (DImode,
28408 gen_rtx_PRE_DEC (DImode,
28409 stack_pointer_rtx)),
28410 operand));
28411 break;
28412 default:
28413 gcc_unreachable ();
28414 }
28415 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28416 }
28417 else
28418 {
28419 switch (mode)
28420 {
28421 case DImode:
28422 {
28423 rtx operands[2];
28424 split_double_mode (mode, &operand, 1, operands, operands + 1);
28425 emit_insn (
28426 gen_rtx_SET (VOIDmode,
28427 gen_rtx_MEM (SImode,
28428 gen_rtx_PRE_DEC (Pmode,
28429 stack_pointer_rtx)),
28430 operands[1]));
28431 emit_insn (
28432 gen_rtx_SET (VOIDmode,
28433 gen_rtx_MEM (SImode,
28434 gen_rtx_PRE_DEC (Pmode,
28435 stack_pointer_rtx)),
28436 operands[0]));
28437 }
28438 break;
28439 case HImode:
28440 /* Store HImodes as SImodes. */
28441 operand = gen_lowpart (SImode, operand);
28442 /* FALLTHRU */
28443 case SImode:
28444 emit_insn (
28445 gen_rtx_SET (VOIDmode,
28446 gen_rtx_MEM (GET_MODE (operand),
28447 gen_rtx_PRE_DEC (SImode,
28448 stack_pointer_rtx)),
28449 operand));
28450 break;
28451 default:
28452 gcc_unreachable ();
28453 }
28454 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28455 }
28456 return result;
28457 }
28458
28459 /* Free operand from the memory. */
28460 void
28461 ix86_free_from_memory (enum machine_mode mode)
28462 {
28463 if (!ix86_using_red_zone ())
28464 {
28465 int size;
28466
28467 if (mode == DImode || TARGET_64BIT)
28468 size = 8;
28469 else
28470 size = 4;
28471 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28472 to pop or add instruction if registers are available. */
28473 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28474 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28475 GEN_INT (size))));
28476 }
28477 }
28478
28479 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28480
28481 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28482 QImode must go into class Q_REGS.
28483 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28484 movdf to do mem-to-mem moves through integer regs. */
28485
28486 static reg_class_t
28487 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28488 {
28489 enum machine_mode mode = GET_MODE (x);
28490
28491 /* We're only allowed to return a subclass of CLASS. Many of the
28492 following checks fail for NO_REGS, so eliminate that early. */
28493 if (regclass == NO_REGS)
28494 return NO_REGS;
28495
28496 /* All classes can load zeros. */
28497 if (x == CONST0_RTX (mode))
28498 return regclass;
28499
28500 /* Force constants into memory if we are loading a (nonzero) constant into
28501 an MMX or SSE register. This is because there are no MMX/SSE instructions
28502 to load from a constant. */
28503 if (CONSTANT_P (x)
28504 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28505 return NO_REGS;
28506
28507 /* Prefer SSE regs only, if we can use them for math. */
28508 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28509 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28510
28511 /* Floating-point constants need more complex checks. */
28512 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28513 {
28514 /* General regs can load everything. */
28515 if (reg_class_subset_p (regclass, GENERAL_REGS))
28516 return regclass;
28517
28518 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28519 zero above. We only want to wind up preferring 80387 registers if
28520 we plan on doing computation with them. */
28521 if (TARGET_80387
28522 && standard_80387_constant_p (x))
28523 {
28524 /* Limit class to non-sse. */
28525 if (regclass == FLOAT_SSE_REGS)
28526 return FLOAT_REGS;
28527 if (regclass == FP_TOP_SSE_REGS)
28528 return FP_TOP_REG;
28529 if (regclass == FP_SECOND_SSE_REGS)
28530 return FP_SECOND_REG;
28531 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28532 return regclass;
28533 }
28534
28535 return NO_REGS;
28536 }
28537
28538 /* Generally when we see PLUS here, it's the function invariant
28539 (plus soft-fp const_int). Which can only be computed into general
28540 regs. */
28541 if (GET_CODE (x) == PLUS)
28542 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28543
28544 /* QImode constants are easy to load, but non-constant QImode data
28545 must go into Q_REGS. */
28546 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28547 {
28548 if (reg_class_subset_p (regclass, Q_REGS))
28549 return regclass;
28550 if (reg_class_subset_p (Q_REGS, regclass))
28551 return Q_REGS;
28552 return NO_REGS;
28553 }
28554
28555 return regclass;
28556 }
28557
28558 /* Discourage putting floating-point values in SSE registers unless
28559 SSE math is being used, and likewise for the 387 registers. */
28560 static reg_class_t
28561 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28562 {
28563 enum machine_mode mode = GET_MODE (x);
28564
28565 /* Restrict the output reload class to the register bank that we are doing
28566 math on. If we would like not to return a subset of CLASS, reject this
28567 alternative: if reload cannot do this, it will still use its choice. */
28568 mode = GET_MODE (x);
28569 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28570 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28571
28572 if (X87_FLOAT_MODE_P (mode))
28573 {
28574 if (regclass == FP_TOP_SSE_REGS)
28575 return FP_TOP_REG;
28576 else if (regclass == FP_SECOND_SSE_REGS)
28577 return FP_SECOND_REG;
28578 else
28579 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28580 }
28581
28582 return regclass;
28583 }
28584
28585 static reg_class_t
28586 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28587 enum machine_mode mode,
28588 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28589 {
28590 /* QImode spills from non-QI registers require
28591 intermediate register on 32bit targets. */
28592 if (!TARGET_64BIT
28593 && !in_p && mode == QImode
28594 && (rclass == GENERAL_REGS
28595 || rclass == LEGACY_REGS
28596 || rclass == INDEX_REGS))
28597 {
28598 int regno;
28599
28600 if (REG_P (x))
28601 regno = REGNO (x);
28602 else
28603 regno = -1;
28604
28605 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28606 regno = true_regnum (x);
28607
28608 /* Return Q_REGS if the operand is in memory. */
28609 if (regno == -1)
28610 return Q_REGS;
28611 }
28612
28613 /* This condition handles corner case where an expression involving
28614 pointers gets vectorized. We're trying to use the address of a
28615 stack slot as a vector initializer.
28616
28617 (set (reg:V2DI 74 [ vect_cst_.2 ])
28618 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28619
28620 Eventually frame gets turned into sp+offset like this:
28621
28622 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28623 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28624 (const_int 392 [0x188]))))
28625
28626 That later gets turned into:
28627
28628 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28629 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28630 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28631
28632 We'll have the following reload recorded:
28633
28634 Reload 0: reload_in (DI) =
28635 (plus:DI (reg/f:DI 7 sp)
28636 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28637 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28638 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28639 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28640 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28641 reload_reg_rtx: (reg:V2DI 22 xmm1)
28642
28643 Which isn't going to work since SSE instructions can't handle scalar
28644 additions. Returning GENERAL_REGS forces the addition into integer
28645 register and reload can handle subsequent reloads without problems. */
28646
28647 if (in_p && GET_CODE (x) == PLUS
28648 && SSE_CLASS_P (rclass)
28649 && SCALAR_INT_MODE_P (mode))
28650 return GENERAL_REGS;
28651
28652 return NO_REGS;
28653 }
28654
28655 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28656
28657 static bool
28658 ix86_class_likely_spilled_p (reg_class_t rclass)
28659 {
28660 switch (rclass)
28661 {
28662 case AREG:
28663 case DREG:
28664 case CREG:
28665 case BREG:
28666 case AD_REGS:
28667 case SIREG:
28668 case DIREG:
28669 case SSE_FIRST_REG:
28670 case FP_TOP_REG:
28671 case FP_SECOND_REG:
28672 return true;
28673
28674 default:
28675 break;
28676 }
28677
28678 return false;
28679 }
28680
28681 /* If we are copying between general and FP registers, we need a memory
28682 location. The same is true for SSE and MMX registers.
28683
28684 To optimize register_move_cost performance, allow inline variant.
28685
28686 The macro can't work reliably when one of the CLASSES is class containing
28687 registers from multiple units (SSE, MMX, integer). We avoid this by never
28688 combining those units in single alternative in the machine description.
28689 Ensure that this constraint holds to avoid unexpected surprises.
28690
28691 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28692 enforce these sanity checks. */
28693
28694 static inline bool
28695 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28696 enum machine_mode mode, int strict)
28697 {
28698 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28699 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28700 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28701 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28702 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28703 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28704 {
28705 gcc_assert (!strict);
28706 return true;
28707 }
28708
28709 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28710 return true;
28711
28712 /* ??? This is a lie. We do have moves between mmx/general, and for
28713 mmx/sse2. But by saying we need secondary memory we discourage the
28714 register allocator from using the mmx registers unless needed. */
28715 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28716 return true;
28717
28718 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28719 {
28720 /* SSE1 doesn't have any direct moves from other classes. */
28721 if (!TARGET_SSE2)
28722 return true;
28723
28724 /* If the target says that inter-unit moves are more expensive
28725 than moving through memory, then don't generate them. */
28726 if (!TARGET_INTER_UNIT_MOVES)
28727 return true;
28728
28729 /* Between SSE and general, we have moves no larger than word size. */
28730 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28731 return true;
28732 }
28733
28734 return false;
28735 }
28736
28737 bool
28738 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28739 enum machine_mode mode, int strict)
28740 {
28741 return inline_secondary_memory_needed (class1, class2, mode, strict);
28742 }
28743
28744 /* Return true if the registers in CLASS cannot represent the change from
28745 modes FROM to TO. */
28746
28747 bool
28748 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28749 enum reg_class regclass)
28750 {
28751 if (from == to)
28752 return false;
28753
28754 /* x87 registers can't do subreg at all, as all values are reformatted
28755 to extended precision. */
28756 if (MAYBE_FLOAT_CLASS_P (regclass))
28757 return true;
28758
28759 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28760 {
28761 /* Vector registers do not support QI or HImode loads. If we don't
28762 disallow a change to these modes, reload will assume it's ok to
28763 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28764 the vec_dupv4hi pattern. */
28765 if (GET_MODE_SIZE (from) < 4)
28766 return true;
28767
28768 /* Vector registers do not support subreg with nonzero offsets, which
28769 are otherwise valid for integer registers. Since we can't see
28770 whether we have a nonzero offset from here, prohibit all
28771 nonparadoxical subregs changing size. */
28772 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28773 return true;
28774 }
28775
28776 return false;
28777 }
28778
28779 /* Return the cost of moving data of mode M between a
28780 register and memory. A value of 2 is the default; this cost is
28781 relative to those in `REGISTER_MOVE_COST'.
28782
28783 This function is used extensively by register_move_cost that is used to
28784 build tables at startup. Make it inline in this case.
28785 When IN is 2, return maximum of in and out move cost.
28786
28787 If moving between registers and memory is more expensive than
28788 between two registers, you should define this macro to express the
28789 relative cost.
28790
28791 Model also increased moving costs of QImode registers in non
28792 Q_REGS classes.
28793 */
28794 static inline int
28795 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28796 int in)
28797 {
28798 int cost;
28799 if (FLOAT_CLASS_P (regclass))
28800 {
28801 int index;
28802 switch (mode)
28803 {
28804 case SFmode:
28805 index = 0;
28806 break;
28807 case DFmode:
28808 index = 1;
28809 break;
28810 case XFmode:
28811 index = 2;
28812 break;
28813 default:
28814 return 100;
28815 }
28816 if (in == 2)
28817 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28818 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28819 }
28820 if (SSE_CLASS_P (regclass))
28821 {
28822 int index;
28823 switch (GET_MODE_SIZE (mode))
28824 {
28825 case 4:
28826 index = 0;
28827 break;
28828 case 8:
28829 index = 1;
28830 break;
28831 case 16:
28832 index = 2;
28833 break;
28834 default:
28835 return 100;
28836 }
28837 if (in == 2)
28838 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28839 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28840 }
28841 if (MMX_CLASS_P (regclass))
28842 {
28843 int index;
28844 switch (GET_MODE_SIZE (mode))
28845 {
28846 case 4:
28847 index = 0;
28848 break;
28849 case 8:
28850 index = 1;
28851 break;
28852 default:
28853 return 100;
28854 }
28855 if (in)
28856 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28857 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28858 }
28859 switch (GET_MODE_SIZE (mode))
28860 {
28861 case 1:
28862 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28863 {
28864 if (!in)
28865 return ix86_cost->int_store[0];
28866 if (TARGET_PARTIAL_REG_DEPENDENCY
28867 && optimize_function_for_speed_p (cfun))
28868 cost = ix86_cost->movzbl_load;
28869 else
28870 cost = ix86_cost->int_load[0];
28871 if (in == 2)
28872 return MAX (cost, ix86_cost->int_store[0]);
28873 return cost;
28874 }
28875 else
28876 {
28877 if (in == 2)
28878 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28879 if (in)
28880 return ix86_cost->movzbl_load;
28881 else
28882 return ix86_cost->int_store[0] + 4;
28883 }
28884 break;
28885 case 2:
28886 if (in == 2)
28887 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28888 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28889 default:
28890 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28891 if (mode == TFmode)
28892 mode = XFmode;
28893 if (in == 2)
28894 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28895 else if (in)
28896 cost = ix86_cost->int_load[2];
28897 else
28898 cost = ix86_cost->int_store[2];
28899 return (cost * (((int) GET_MODE_SIZE (mode)
28900 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28901 }
28902 }
28903
28904 static int
28905 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28906 bool in)
28907 {
28908 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28909 }
28910
28911
28912 /* Return the cost of moving data from a register in class CLASS1 to
28913 one in class CLASS2.
28914
28915 It is not required that the cost always equal 2 when FROM is the same as TO;
28916 on some machines it is expensive to move between registers if they are not
28917 general registers. */
28918
28919 static int
28920 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28921 reg_class_t class2_i)
28922 {
28923 enum reg_class class1 = (enum reg_class) class1_i;
28924 enum reg_class class2 = (enum reg_class) class2_i;
28925
28926 /* In case we require secondary memory, compute cost of the store followed
28927 by load. In order to avoid bad register allocation choices, we need
28928 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28929
28930 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28931 {
28932 int cost = 1;
28933
28934 cost += inline_memory_move_cost (mode, class1, 2);
28935 cost += inline_memory_move_cost (mode, class2, 2);
28936
28937 /* In case of copying from general_purpose_register we may emit multiple
28938 stores followed by single load causing memory size mismatch stall.
28939 Count this as arbitrarily high cost of 20. */
28940 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28941 cost += 20;
28942
28943 /* In the case of FP/MMX moves, the registers actually overlap, and we
28944 have to switch modes in order to treat them differently. */
28945 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28946 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28947 cost += 20;
28948
28949 return cost;
28950 }
28951
28952 /* Moves between SSE/MMX and integer unit are expensive. */
28953 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28954 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28955
28956 /* ??? By keeping returned value relatively high, we limit the number
28957 of moves between integer and MMX/SSE registers for all targets.
28958 Additionally, high value prevents problem with x86_modes_tieable_p(),
28959 where integer modes in MMX/SSE registers are not tieable
28960 because of missing QImode and HImode moves to, from or between
28961 MMX/SSE registers. */
28962 return MAX (8, ix86_cost->mmxsse_to_integer);
28963
28964 if (MAYBE_FLOAT_CLASS_P (class1))
28965 return ix86_cost->fp_move;
28966 if (MAYBE_SSE_CLASS_P (class1))
28967 return ix86_cost->sse_move;
28968 if (MAYBE_MMX_CLASS_P (class1))
28969 return ix86_cost->mmx_move;
28970 return 2;
28971 }
28972
28973 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28974
28975 bool
28976 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28977 {
28978 /* Flags and only flags can only hold CCmode values. */
28979 if (CC_REGNO_P (regno))
28980 return GET_MODE_CLASS (mode) == MODE_CC;
28981 if (GET_MODE_CLASS (mode) == MODE_CC
28982 || GET_MODE_CLASS (mode) == MODE_RANDOM
28983 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28984 return 0;
28985 if (FP_REGNO_P (regno))
28986 return VALID_FP_MODE_P (mode);
28987 if (SSE_REGNO_P (regno))
28988 {
28989 /* We implement the move patterns for all vector modes into and
28990 out of SSE registers, even when no operation instructions
28991 are available. OImode move is available only when AVX is
28992 enabled. */
28993 return ((TARGET_AVX && mode == OImode)
28994 || VALID_AVX256_REG_MODE (mode)
28995 || VALID_SSE_REG_MODE (mode)
28996 || VALID_SSE2_REG_MODE (mode)
28997 || VALID_MMX_REG_MODE (mode)
28998 || VALID_MMX_REG_MODE_3DNOW (mode));
28999 }
29000 if (MMX_REGNO_P (regno))
29001 {
29002 /* We implement the move patterns for 3DNOW modes even in MMX mode,
29003 so if the register is available at all, then we can move data of
29004 the given mode into or out of it. */
29005 return (VALID_MMX_REG_MODE (mode)
29006 || VALID_MMX_REG_MODE_3DNOW (mode));
29007 }
29008
29009 if (mode == QImode)
29010 {
29011 /* Take care for QImode values - they can be in non-QI regs,
29012 but then they do cause partial register stalls. */
29013 if (regno <= BX_REG || TARGET_64BIT)
29014 return 1;
29015 if (!TARGET_PARTIAL_REG_STALL)
29016 return 1;
29017 return reload_in_progress || reload_completed;
29018 }
29019 /* We handle both integer and floats in the general purpose registers. */
29020 else if (VALID_INT_MODE_P (mode))
29021 return 1;
29022 else if (VALID_FP_MODE_P (mode))
29023 return 1;
29024 else if (VALID_DFP_MODE_P (mode))
29025 return 1;
29026 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29027 on to use that value in smaller contexts, this can easily force a
29028 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29029 supporting DImode, allow it. */
29030 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29031 return 1;
29032
29033 return 0;
29034 }
29035
29036 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29037 tieable integer mode. */
29038
29039 static bool
29040 ix86_tieable_integer_mode_p (enum machine_mode mode)
29041 {
29042 switch (mode)
29043 {
29044 case HImode:
29045 case SImode:
29046 return true;
29047
29048 case QImode:
29049 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29050
29051 case DImode:
29052 return TARGET_64BIT;
29053
29054 default:
29055 return false;
29056 }
29057 }
29058
29059 /* Return true if MODE1 is accessible in a register that can hold MODE2
29060 without copying. That is, all register classes that can hold MODE2
29061 can also hold MODE1. */
29062
29063 bool
29064 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29065 {
29066 if (mode1 == mode2)
29067 return true;
29068
29069 if (ix86_tieable_integer_mode_p (mode1)
29070 && ix86_tieable_integer_mode_p (mode2))
29071 return true;
29072
29073 /* MODE2 being XFmode implies fp stack or general regs, which means we
29074 can tie any smaller floating point modes to it. Note that we do not
29075 tie this with TFmode. */
29076 if (mode2 == XFmode)
29077 return mode1 == SFmode || mode1 == DFmode;
29078
29079 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29080 that we can tie it with SFmode. */
29081 if (mode2 == DFmode)
29082 return mode1 == SFmode;
29083
29084 /* If MODE2 is only appropriate for an SSE register, then tie with
29085 any other mode acceptable to SSE registers. */
29086 if (GET_MODE_SIZE (mode2) == 16
29087 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29088 return (GET_MODE_SIZE (mode1) == 16
29089 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29090
29091 /* If MODE2 is appropriate for an MMX register, then tie
29092 with any other mode acceptable to MMX registers. */
29093 if (GET_MODE_SIZE (mode2) == 8
29094 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29095 return (GET_MODE_SIZE (mode1) == 8
29096 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29097
29098 return false;
29099 }
29100
29101 /* Compute a (partial) cost for rtx X. Return true if the complete
29102 cost has been computed, and false if subexpressions should be
29103 scanned. In either case, *TOTAL contains the cost result. */
29104
29105 static bool
29106 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29107 {
29108 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29109 enum machine_mode mode = GET_MODE (x);
29110 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29111
29112 switch (code)
29113 {
29114 case CONST_INT:
29115 case CONST:
29116 case LABEL_REF:
29117 case SYMBOL_REF:
29118 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29119 *total = 3;
29120 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29121 *total = 2;
29122 else if (flag_pic && SYMBOLIC_CONST (x)
29123 && (!TARGET_64BIT
29124 || (!GET_CODE (x) != LABEL_REF
29125 && (GET_CODE (x) != SYMBOL_REF
29126 || !SYMBOL_REF_LOCAL_P (x)))))
29127 *total = 1;
29128 else
29129 *total = 0;
29130 return true;
29131
29132 case CONST_DOUBLE:
29133 if (mode == VOIDmode)
29134 *total = 0;
29135 else
29136 switch (standard_80387_constant_p (x))
29137 {
29138 case 1: /* 0.0 */
29139 *total = 1;
29140 break;
29141 default: /* Other constants */
29142 *total = 2;
29143 break;
29144 case 0:
29145 case -1:
29146 /* Start with (MEM (SYMBOL_REF)), since that's where
29147 it'll probably end up. Add a penalty for size. */
29148 *total = (COSTS_N_INSNS (1)
29149 + (flag_pic != 0 && !TARGET_64BIT)
29150 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29151 break;
29152 }
29153 return true;
29154
29155 case ZERO_EXTEND:
29156 /* The zero extensions is often completely free on x86_64, so make
29157 it as cheap as possible. */
29158 if (TARGET_64BIT && mode == DImode
29159 && GET_MODE (XEXP (x, 0)) == SImode)
29160 *total = 1;
29161 else if (TARGET_ZERO_EXTEND_WITH_AND)
29162 *total = cost->add;
29163 else
29164 *total = cost->movzx;
29165 return false;
29166
29167 case SIGN_EXTEND:
29168 *total = cost->movsx;
29169 return false;
29170
29171 case ASHIFT:
29172 if (CONST_INT_P (XEXP (x, 1))
29173 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29174 {
29175 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29176 if (value == 1)
29177 {
29178 *total = cost->add;
29179 return false;
29180 }
29181 if ((value == 2 || value == 3)
29182 && cost->lea <= cost->shift_const)
29183 {
29184 *total = cost->lea;
29185 return false;
29186 }
29187 }
29188 /* FALLTHRU */
29189
29190 case ROTATE:
29191 case ASHIFTRT:
29192 case LSHIFTRT:
29193 case ROTATERT:
29194 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29195 {
29196 if (CONST_INT_P (XEXP (x, 1)))
29197 {
29198 if (INTVAL (XEXP (x, 1)) > 32)
29199 *total = cost->shift_const + COSTS_N_INSNS (2);
29200 else
29201 *total = cost->shift_const * 2;
29202 }
29203 else
29204 {
29205 if (GET_CODE (XEXP (x, 1)) == AND)
29206 *total = cost->shift_var * 2;
29207 else
29208 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29209 }
29210 }
29211 else
29212 {
29213 if (CONST_INT_P (XEXP (x, 1)))
29214 *total = cost->shift_const;
29215 else
29216 *total = cost->shift_var;
29217 }
29218 return false;
29219
29220 case FMA:
29221 {
29222 rtx sub;
29223
29224 gcc_assert (FLOAT_MODE_P (mode));
29225 gcc_assert (TARGET_FMA || TARGET_FMA4);
29226
29227 /* ??? SSE scalar/vector cost should be used here. */
29228 /* ??? Bald assumption that fma has the same cost as fmul. */
29229 *total = cost->fmul;
29230 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29231
29232 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29233 sub = XEXP (x, 0);
29234 if (GET_CODE (sub) == NEG)
29235 sub = XEXP (x, 0);
29236 *total += rtx_cost (sub, FMA, speed);
29237
29238 sub = XEXP (x, 2);
29239 if (GET_CODE (sub) == NEG)
29240 sub = XEXP (x, 0);
29241 *total += rtx_cost (sub, FMA, speed);
29242 return true;
29243 }
29244
29245 case MULT:
29246 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29247 {
29248 /* ??? SSE scalar cost should be used here. */
29249 *total = cost->fmul;
29250 return false;
29251 }
29252 else if (X87_FLOAT_MODE_P (mode))
29253 {
29254 *total = cost->fmul;
29255 return false;
29256 }
29257 else if (FLOAT_MODE_P (mode))
29258 {
29259 /* ??? SSE vector cost should be used here. */
29260 *total = cost->fmul;
29261 return false;
29262 }
29263 else
29264 {
29265 rtx op0 = XEXP (x, 0);
29266 rtx op1 = XEXP (x, 1);
29267 int nbits;
29268 if (CONST_INT_P (XEXP (x, 1)))
29269 {
29270 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29271 for (nbits = 0; value != 0; value &= value - 1)
29272 nbits++;
29273 }
29274 else
29275 /* This is arbitrary. */
29276 nbits = 7;
29277
29278 /* Compute costs correctly for widening multiplication. */
29279 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29280 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29281 == GET_MODE_SIZE (mode))
29282 {
29283 int is_mulwiden = 0;
29284 enum machine_mode inner_mode = GET_MODE (op0);
29285
29286 if (GET_CODE (op0) == GET_CODE (op1))
29287 is_mulwiden = 1, op1 = XEXP (op1, 0);
29288 else if (CONST_INT_P (op1))
29289 {
29290 if (GET_CODE (op0) == SIGN_EXTEND)
29291 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29292 == INTVAL (op1);
29293 else
29294 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29295 }
29296
29297 if (is_mulwiden)
29298 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29299 }
29300
29301 *total = (cost->mult_init[MODE_INDEX (mode)]
29302 + nbits * cost->mult_bit
29303 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29304
29305 return true;
29306 }
29307
29308 case DIV:
29309 case UDIV:
29310 case MOD:
29311 case UMOD:
29312 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29313 /* ??? SSE cost should be used here. */
29314 *total = cost->fdiv;
29315 else if (X87_FLOAT_MODE_P (mode))
29316 *total = cost->fdiv;
29317 else if (FLOAT_MODE_P (mode))
29318 /* ??? SSE vector cost should be used here. */
29319 *total = cost->fdiv;
29320 else
29321 *total = cost->divide[MODE_INDEX (mode)];
29322 return false;
29323
29324 case PLUS:
29325 if (GET_MODE_CLASS (mode) == MODE_INT
29326 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29327 {
29328 if (GET_CODE (XEXP (x, 0)) == PLUS
29329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29330 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29331 && CONSTANT_P (XEXP (x, 1)))
29332 {
29333 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29334 if (val == 2 || val == 4 || val == 8)
29335 {
29336 *total = cost->lea;
29337 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29338 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29339 outer_code, speed);
29340 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29341 return true;
29342 }
29343 }
29344 else if (GET_CODE (XEXP (x, 0)) == MULT
29345 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29346 {
29347 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29348 if (val == 2 || val == 4 || val == 8)
29349 {
29350 *total = cost->lea;
29351 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29352 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29353 return true;
29354 }
29355 }
29356 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29357 {
29358 *total = cost->lea;
29359 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29360 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29361 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29362 return true;
29363 }
29364 }
29365 /* FALLTHRU */
29366
29367 case MINUS:
29368 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29369 {
29370 /* ??? SSE cost should be used here. */
29371 *total = cost->fadd;
29372 return false;
29373 }
29374 else if (X87_FLOAT_MODE_P (mode))
29375 {
29376 *total = cost->fadd;
29377 return false;
29378 }
29379 else if (FLOAT_MODE_P (mode))
29380 {
29381 /* ??? SSE vector cost should be used here. */
29382 *total = cost->fadd;
29383 return false;
29384 }
29385 /* FALLTHRU */
29386
29387 case AND:
29388 case IOR:
29389 case XOR:
29390 if (!TARGET_64BIT && mode == DImode)
29391 {
29392 *total = (cost->add * 2
29393 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29394 << (GET_MODE (XEXP (x, 0)) != DImode))
29395 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29396 << (GET_MODE (XEXP (x, 1)) != DImode)));
29397 return true;
29398 }
29399 /* FALLTHRU */
29400
29401 case NEG:
29402 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29403 {
29404 /* ??? SSE cost should be used here. */
29405 *total = cost->fchs;
29406 return false;
29407 }
29408 else if (X87_FLOAT_MODE_P (mode))
29409 {
29410 *total = cost->fchs;
29411 return false;
29412 }
29413 else if (FLOAT_MODE_P (mode))
29414 {
29415 /* ??? SSE vector cost should be used here. */
29416 *total = cost->fchs;
29417 return false;
29418 }
29419 /* FALLTHRU */
29420
29421 case NOT:
29422 if (!TARGET_64BIT && mode == DImode)
29423 *total = cost->add * 2;
29424 else
29425 *total = cost->add;
29426 return false;
29427
29428 case COMPARE:
29429 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29430 && XEXP (XEXP (x, 0), 1) == const1_rtx
29431 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29432 && XEXP (x, 1) == const0_rtx)
29433 {
29434 /* This kind of construct is implemented using test[bwl].
29435 Treat it as if we had an AND. */
29436 *total = (cost->add
29437 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29438 + rtx_cost (const1_rtx, outer_code, speed));
29439 return true;
29440 }
29441 return false;
29442
29443 case FLOAT_EXTEND:
29444 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29445 *total = 0;
29446 return false;
29447
29448 case ABS:
29449 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29450 /* ??? SSE cost should be used here. */
29451 *total = cost->fabs;
29452 else if (X87_FLOAT_MODE_P (mode))
29453 *total = cost->fabs;
29454 else if (FLOAT_MODE_P (mode))
29455 /* ??? SSE vector cost should be used here. */
29456 *total = cost->fabs;
29457 return false;
29458
29459 case SQRT:
29460 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29461 /* ??? SSE cost should be used here. */
29462 *total = cost->fsqrt;
29463 else if (X87_FLOAT_MODE_P (mode))
29464 *total = cost->fsqrt;
29465 else if (FLOAT_MODE_P (mode))
29466 /* ??? SSE vector cost should be used here. */
29467 *total = cost->fsqrt;
29468 return false;
29469
29470 case UNSPEC:
29471 if (XINT (x, 1) == UNSPEC_TP)
29472 *total = 0;
29473 return false;
29474
29475 case VEC_SELECT:
29476 case VEC_CONCAT:
29477 case VEC_MERGE:
29478 case VEC_DUPLICATE:
29479 /* ??? Assume all of these vector manipulation patterns are
29480 recognizable. In which case they all pretty much have the
29481 same cost. */
29482 *total = COSTS_N_INSNS (1);
29483 return true;
29484
29485 default:
29486 return false;
29487 }
29488 }
29489
29490 #if TARGET_MACHO
29491
29492 static int current_machopic_label_num;
29493
29494 /* Given a symbol name and its associated stub, write out the
29495 definition of the stub. */
29496
29497 void
29498 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29499 {
29500 unsigned int length;
29501 char *binder_name, *symbol_name, lazy_ptr_name[32];
29502 int label = ++current_machopic_label_num;
29503
29504 /* For 64-bit we shouldn't get here. */
29505 gcc_assert (!TARGET_64BIT);
29506
29507 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29508 symb = targetm.strip_name_encoding (symb);
29509
29510 length = strlen (stub);
29511 binder_name = XALLOCAVEC (char, length + 32);
29512 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29513
29514 length = strlen (symb);
29515 symbol_name = XALLOCAVEC (char, length + 32);
29516 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29517
29518 sprintf (lazy_ptr_name, "L%d$lz", label);
29519
29520 if (MACHOPIC_ATT_STUB)
29521 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29522 else if (MACHOPIC_PURE)
29523 {
29524 if (TARGET_DEEP_BRANCH_PREDICTION)
29525 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29526 else
29527 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29528 }
29529 else
29530 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29531
29532 fprintf (file, "%s:\n", stub);
29533 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29534
29535 if (MACHOPIC_ATT_STUB)
29536 {
29537 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29538 }
29539 else if (MACHOPIC_PURE)
29540 {
29541 /* PIC stub. */
29542 if (TARGET_DEEP_BRANCH_PREDICTION)
29543 {
29544 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29545 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29546 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29547 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29548 }
29549 else
29550 {
29551 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29552 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29553 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29554 }
29555 fprintf (file, "\tjmp\t*%%ecx\n");
29556 }
29557 else
29558 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29559
29560 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29561 it needs no stub-binding-helper. */
29562 if (MACHOPIC_ATT_STUB)
29563 return;
29564
29565 fprintf (file, "%s:\n", binder_name);
29566
29567 if (MACHOPIC_PURE)
29568 {
29569 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29570 fprintf (file, "\tpushl\t%%ecx\n");
29571 }
29572 else
29573 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29574
29575 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29576
29577 /* N.B. Keep the correspondence of these
29578 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29579 old-pic/new-pic/non-pic stubs; altering this will break
29580 compatibility with existing dylibs. */
29581 if (MACHOPIC_PURE)
29582 {
29583 /* PIC stubs. */
29584 if (TARGET_DEEP_BRANCH_PREDICTION)
29585 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29586 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29587 else
29588 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29589 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29590 }
29591 else
29592 /* 16-byte -mdynamic-no-pic stub. */
29593 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29594
29595 fprintf (file, "%s:\n", lazy_ptr_name);
29596 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29597 fprintf (file, ASM_LONG "%s\n", binder_name);
29598 }
29599 #endif /* TARGET_MACHO */
29600
29601 /* Order the registers for register allocator. */
29602
29603 void
29604 x86_order_regs_for_local_alloc (void)
29605 {
29606 int pos = 0;
29607 int i;
29608
29609 /* First allocate the local general purpose registers. */
29610 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29611 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29612 reg_alloc_order [pos++] = i;
29613
29614 /* Global general purpose registers. */
29615 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29616 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29617 reg_alloc_order [pos++] = i;
29618
29619 /* x87 registers come first in case we are doing FP math
29620 using them. */
29621 if (!TARGET_SSE_MATH)
29622 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29623 reg_alloc_order [pos++] = i;
29624
29625 /* SSE registers. */
29626 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29627 reg_alloc_order [pos++] = i;
29628 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29629 reg_alloc_order [pos++] = i;
29630
29631 /* x87 registers. */
29632 if (TARGET_SSE_MATH)
29633 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29634 reg_alloc_order [pos++] = i;
29635
29636 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29637 reg_alloc_order [pos++] = i;
29638
29639 /* Initialize the rest of array as we do not allocate some registers
29640 at all. */
29641 while (pos < FIRST_PSEUDO_REGISTER)
29642 reg_alloc_order [pos++] = 0;
29643 }
29644
29645 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29646 in struct attribute_spec handler. */
29647 static tree
29648 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29649 tree args,
29650 int flags ATTRIBUTE_UNUSED,
29651 bool *no_add_attrs)
29652 {
29653 if (TREE_CODE (*node) != FUNCTION_TYPE
29654 && TREE_CODE (*node) != METHOD_TYPE
29655 && TREE_CODE (*node) != FIELD_DECL
29656 && TREE_CODE (*node) != TYPE_DECL)
29657 {
29658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29659 name);
29660 *no_add_attrs = true;
29661 return NULL_TREE;
29662 }
29663 if (TARGET_64BIT)
29664 {
29665 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29666 name);
29667 *no_add_attrs = true;
29668 return NULL_TREE;
29669 }
29670 if (is_attribute_p ("callee_pop_aggregate_return", name))
29671 {
29672 tree cst;
29673
29674 cst = TREE_VALUE (args);
29675 if (TREE_CODE (cst) != INTEGER_CST)
29676 {
29677 warning (OPT_Wattributes,
29678 "%qE attribute requires an integer constant argument",
29679 name);
29680 *no_add_attrs = true;
29681 }
29682 else if (compare_tree_int (cst, 0) != 0
29683 && compare_tree_int (cst, 1) != 0)
29684 {
29685 warning (OPT_Wattributes,
29686 "argument to %qE attribute is neither zero, nor one",
29687 name);
29688 *no_add_attrs = true;
29689 }
29690
29691 return NULL_TREE;
29692 }
29693
29694 return NULL_TREE;
29695 }
29696
29697 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29698 struct attribute_spec.handler. */
29699 static tree
29700 ix86_handle_abi_attribute (tree *node, tree name,
29701 tree args ATTRIBUTE_UNUSED,
29702 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29703 {
29704 if (TREE_CODE (*node) != FUNCTION_TYPE
29705 && TREE_CODE (*node) != METHOD_TYPE
29706 && TREE_CODE (*node) != FIELD_DECL
29707 && TREE_CODE (*node) != TYPE_DECL)
29708 {
29709 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29710 name);
29711 *no_add_attrs = true;
29712 return NULL_TREE;
29713 }
29714 if (!TARGET_64BIT)
29715 {
29716 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29717 name);
29718 *no_add_attrs = true;
29719 return NULL_TREE;
29720 }
29721
29722 /* Can combine regparm with all attributes but fastcall. */
29723 if (is_attribute_p ("ms_abi", name))
29724 {
29725 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29726 {
29727 error ("ms_abi and sysv_abi attributes are not compatible");
29728 }
29729
29730 return NULL_TREE;
29731 }
29732 else if (is_attribute_p ("sysv_abi", name))
29733 {
29734 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29735 {
29736 error ("ms_abi and sysv_abi attributes are not compatible");
29737 }
29738
29739 return NULL_TREE;
29740 }
29741
29742 return NULL_TREE;
29743 }
29744
29745 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29746 struct attribute_spec.handler. */
29747 static tree
29748 ix86_handle_struct_attribute (tree *node, tree name,
29749 tree args ATTRIBUTE_UNUSED,
29750 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29751 {
29752 tree *type = NULL;
29753 if (DECL_P (*node))
29754 {
29755 if (TREE_CODE (*node) == TYPE_DECL)
29756 type = &TREE_TYPE (*node);
29757 }
29758 else
29759 type = node;
29760
29761 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29762 || TREE_CODE (*type) == UNION_TYPE)))
29763 {
29764 warning (OPT_Wattributes, "%qE attribute ignored",
29765 name);
29766 *no_add_attrs = true;
29767 }
29768
29769 else if ((is_attribute_p ("ms_struct", name)
29770 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29771 || ((is_attribute_p ("gcc_struct", name)
29772 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29773 {
29774 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29775 name);
29776 *no_add_attrs = true;
29777 }
29778
29779 return NULL_TREE;
29780 }
29781
29782 static tree
29783 ix86_handle_fndecl_attribute (tree *node, tree name,
29784 tree args ATTRIBUTE_UNUSED,
29785 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29786 {
29787 if (TREE_CODE (*node) != FUNCTION_DECL)
29788 {
29789 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29790 name);
29791 *no_add_attrs = true;
29792 }
29793 return NULL_TREE;
29794 }
29795
29796 static bool
29797 ix86_ms_bitfield_layout_p (const_tree record_type)
29798 {
29799 return ((TARGET_MS_BITFIELD_LAYOUT
29800 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29801 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29802 }
29803
29804 /* Returns an expression indicating where the this parameter is
29805 located on entry to the FUNCTION. */
29806
29807 static rtx
29808 x86_this_parameter (tree function)
29809 {
29810 tree type = TREE_TYPE (function);
29811 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29812 int nregs;
29813
29814 if (TARGET_64BIT)
29815 {
29816 const int *parm_regs;
29817
29818 if (ix86_function_type_abi (type) == MS_ABI)
29819 parm_regs = x86_64_ms_abi_int_parameter_registers;
29820 else
29821 parm_regs = x86_64_int_parameter_registers;
29822 return gen_rtx_REG (DImode, parm_regs[aggr]);
29823 }
29824
29825 nregs = ix86_function_regparm (type, function);
29826
29827 if (nregs > 0 && !stdarg_p (type))
29828 {
29829 int regno;
29830 unsigned int ccvt = ix86_get_callcvt (type);
29831
29832 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29833 regno = aggr ? DX_REG : CX_REG;
29834 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29835 {
29836 regno = CX_REG;
29837 if (aggr)
29838 return gen_rtx_MEM (SImode,
29839 plus_constant (stack_pointer_rtx, 4));
29840 }
29841 else
29842 {
29843 regno = AX_REG;
29844 if (aggr)
29845 {
29846 regno = DX_REG;
29847 if (nregs == 1)
29848 return gen_rtx_MEM (SImode,
29849 plus_constant (stack_pointer_rtx, 4));
29850 }
29851 }
29852 return gen_rtx_REG (SImode, regno);
29853 }
29854
29855 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29856 }
29857
29858 /* Determine whether x86_output_mi_thunk can succeed. */
29859
29860 static bool
29861 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29862 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29863 HOST_WIDE_INT vcall_offset, const_tree function)
29864 {
29865 /* 64-bit can handle anything. */
29866 if (TARGET_64BIT)
29867 return true;
29868
29869 /* For 32-bit, everything's fine if we have one free register. */
29870 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29871 return true;
29872
29873 /* Need a free register for vcall_offset. */
29874 if (vcall_offset)
29875 return false;
29876
29877 /* Need a free register for GOT references. */
29878 if (flag_pic && !targetm.binds_local_p (function))
29879 return false;
29880
29881 /* Otherwise ok. */
29882 return true;
29883 }
29884
29885 /* Output the assembler code for a thunk function. THUNK_DECL is the
29886 declaration for the thunk function itself, FUNCTION is the decl for
29887 the target function. DELTA is an immediate constant offset to be
29888 added to THIS. If VCALL_OFFSET is nonzero, the word at
29889 *(*this + vcall_offset) should be added to THIS. */
29890
29891 static void
29892 x86_output_mi_thunk (FILE *file,
29893 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29894 HOST_WIDE_INT vcall_offset, tree function)
29895 {
29896 rtx xops[3];
29897 rtx this_param = x86_this_parameter (function);
29898 rtx this_reg, tmp;
29899
29900 /* Make sure unwind info is emitted for the thunk if needed. */
29901 final_start_function (emit_barrier (), file, 1);
29902
29903 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29904 pull it in now and let DELTA benefit. */
29905 if (REG_P (this_param))
29906 this_reg = this_param;
29907 else if (vcall_offset)
29908 {
29909 /* Put the this parameter into %eax. */
29910 xops[0] = this_param;
29911 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29912 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29913 }
29914 else
29915 this_reg = NULL_RTX;
29916
29917 /* Adjust the this parameter by a fixed constant. */
29918 if (delta)
29919 {
29920 xops[0] = GEN_INT (delta);
29921 xops[1] = this_reg ? this_reg : this_param;
29922 if (TARGET_64BIT)
29923 {
29924 if (!x86_64_general_operand (xops[0], DImode))
29925 {
29926 tmp = gen_rtx_REG (DImode, R10_REG);
29927 xops[1] = tmp;
29928 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29929 xops[0] = tmp;
29930 xops[1] = this_param;
29931 }
29932 if (x86_maybe_negate_const_int (&xops[0], DImode))
29933 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29934 else
29935 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29936 }
29937 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29938 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29939 else
29940 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29941 }
29942
29943 /* Adjust the this parameter by a value stored in the vtable. */
29944 if (vcall_offset)
29945 {
29946 if (TARGET_64BIT)
29947 tmp = gen_rtx_REG (DImode, R10_REG);
29948 else
29949 {
29950 int tmp_regno = CX_REG;
29951 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29952 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29953 tmp_regno = AX_REG;
29954 tmp = gen_rtx_REG (SImode, tmp_regno);
29955 }
29956
29957 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29958 xops[1] = tmp;
29959 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29960
29961 /* Adjust the this parameter. */
29962 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29963 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29964 {
29965 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29966 xops[0] = GEN_INT (vcall_offset);
29967 xops[1] = tmp2;
29968 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29969 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29970 }
29971 xops[1] = this_reg;
29972 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29973 }
29974
29975 /* If necessary, drop THIS back to its stack slot. */
29976 if (this_reg && this_reg != this_param)
29977 {
29978 xops[0] = this_reg;
29979 xops[1] = this_param;
29980 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29981 }
29982
29983 xops[0] = XEXP (DECL_RTL (function), 0);
29984 if (TARGET_64BIT)
29985 {
29986 if (!flag_pic || targetm.binds_local_p (function)
29987 || DEFAULT_ABI == MS_ABI)
29988 output_asm_insn ("jmp\t%P0", xops);
29989 /* All thunks should be in the same object as their target,
29990 and thus binds_local_p should be true. */
29991 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29992 gcc_unreachable ();
29993 else
29994 {
29995 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29996 tmp = gen_rtx_CONST (Pmode, tmp);
29997 tmp = gen_rtx_MEM (QImode, tmp);
29998 xops[0] = tmp;
29999 output_asm_insn ("jmp\t%A0", xops);
30000 }
30001 }
30002 else
30003 {
30004 if (!flag_pic || targetm.binds_local_p (function))
30005 output_asm_insn ("jmp\t%P0", xops);
30006 else
30007 #if TARGET_MACHO
30008 if (TARGET_MACHO)
30009 {
30010 rtx sym_ref = XEXP (DECL_RTL (function), 0);
30011 if (TARGET_MACHO_BRANCH_ISLANDS)
30012 sym_ref = (gen_rtx_SYMBOL_REF
30013 (Pmode,
30014 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
30015 tmp = gen_rtx_MEM (QImode, sym_ref);
30016 xops[0] = tmp;
30017 output_asm_insn ("jmp\t%0", xops);
30018 }
30019 else
30020 #endif /* TARGET_MACHO */
30021 {
30022 tmp = gen_rtx_REG (SImode, CX_REG);
30023 output_set_got (tmp, NULL_RTX);
30024
30025 xops[1] = tmp;
30026 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
30027 output_asm_insn ("jmp\t{*}%1", xops);
30028 }
30029 }
30030 final_end_function ();
30031 }
30032
30033 static void
30034 x86_file_start (void)
30035 {
30036 default_file_start ();
30037 #if TARGET_MACHO
30038 darwin_file_start ();
30039 #endif
30040 if (X86_FILE_START_VERSION_DIRECTIVE)
30041 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30042 if (X86_FILE_START_FLTUSED)
30043 fputs ("\t.global\t__fltused\n", asm_out_file);
30044 if (ix86_asm_dialect == ASM_INTEL)
30045 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30046 }
30047
30048 int
30049 x86_field_alignment (tree field, int computed)
30050 {
30051 enum machine_mode mode;
30052 tree type = TREE_TYPE (field);
30053
30054 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30055 return computed;
30056 mode = TYPE_MODE (strip_array_types (type));
30057 if (mode == DFmode || mode == DCmode
30058 || GET_MODE_CLASS (mode) == MODE_INT
30059 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30060 return MIN (32, computed);
30061 return computed;
30062 }
30063
30064 /* Output assembler code to FILE to increment profiler label # LABELNO
30065 for profiling a function entry. */
30066 void
30067 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30068 {
30069 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30070 : MCOUNT_NAME);
30071
30072 if (TARGET_64BIT)
30073 {
30074 #ifndef NO_PROFILE_COUNTERS
30075 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30076 #endif
30077
30078 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30079 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30080 else
30081 fprintf (file, "\tcall\t%s\n", mcount_name);
30082 }
30083 else if (flag_pic)
30084 {
30085 #ifndef NO_PROFILE_COUNTERS
30086 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30087 LPREFIX, labelno);
30088 #endif
30089 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30090 }
30091 else
30092 {
30093 #ifndef NO_PROFILE_COUNTERS
30094 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30095 LPREFIX, labelno);
30096 #endif
30097 fprintf (file, "\tcall\t%s\n", mcount_name);
30098 }
30099 }
30100
30101 /* We don't have exact information about the insn sizes, but we may assume
30102 quite safely that we are informed about all 1 byte insns and memory
30103 address sizes. This is enough to eliminate unnecessary padding in
30104 99% of cases. */
30105
30106 static int
30107 min_insn_size (rtx insn)
30108 {
30109 int l = 0, len;
30110
30111 if (!INSN_P (insn) || !active_insn_p (insn))
30112 return 0;
30113
30114 /* Discard alignments we've emit and jump instructions. */
30115 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30116 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30117 return 0;
30118 if (JUMP_TABLE_DATA_P (insn))
30119 return 0;
30120
30121 /* Important case - calls are always 5 bytes.
30122 It is common to have many calls in the row. */
30123 if (CALL_P (insn)
30124 && symbolic_reference_mentioned_p (PATTERN (insn))
30125 && !SIBLING_CALL_P (insn))
30126 return 5;
30127 len = get_attr_length (insn);
30128 if (len <= 1)
30129 return 1;
30130
30131 /* For normal instructions we rely on get_attr_length being exact,
30132 with a few exceptions. */
30133 if (!JUMP_P (insn))
30134 {
30135 enum attr_type type = get_attr_type (insn);
30136
30137 switch (type)
30138 {
30139 case TYPE_MULTI:
30140 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30141 || asm_noperands (PATTERN (insn)) >= 0)
30142 return 0;
30143 break;
30144 case TYPE_OTHER:
30145 case TYPE_FCMP:
30146 break;
30147 default:
30148 /* Otherwise trust get_attr_length. */
30149 return len;
30150 }
30151
30152 l = get_attr_length_address (insn);
30153 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30154 l = 4;
30155 }
30156 if (l)
30157 return 1+l;
30158 else
30159 return 2;
30160 }
30161
30162 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30163
30164 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30165 window. */
30166
30167 static void
30168 ix86_avoid_jump_mispredicts (void)
30169 {
30170 rtx insn, start = get_insns ();
30171 int nbytes = 0, njumps = 0;
30172 int isjump = 0;
30173
30174 /* Look for all minimal intervals of instructions containing 4 jumps.
30175 The intervals are bounded by START and INSN. NBYTES is the total
30176 size of instructions in the interval including INSN and not including
30177 START. When the NBYTES is smaller than 16 bytes, it is possible
30178 that the end of START and INSN ends up in the same 16byte page.
30179
30180 The smallest offset in the page INSN can start is the case where START
30181 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30182 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30183 */
30184 for (insn = start; insn; insn = NEXT_INSN (insn))
30185 {
30186 int min_size;
30187
30188 if (LABEL_P (insn))
30189 {
30190 int align = label_to_alignment (insn);
30191 int max_skip = label_to_max_skip (insn);
30192
30193 if (max_skip > 15)
30194 max_skip = 15;
30195 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30196 already in the current 16 byte page, because otherwise
30197 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30198 bytes to reach 16 byte boundary. */
30199 if (align <= 0
30200 || (align <= 3 && max_skip != (1 << align) - 1))
30201 max_skip = 0;
30202 if (dump_file)
30203 fprintf (dump_file, "Label %i with max_skip %i\n",
30204 INSN_UID (insn), max_skip);
30205 if (max_skip)
30206 {
30207 while (nbytes + max_skip >= 16)
30208 {
30209 start = NEXT_INSN (start);
30210 if ((JUMP_P (start)
30211 && GET_CODE (PATTERN (start)) != ADDR_VEC
30212 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30213 || CALL_P (start))
30214 njumps--, isjump = 1;
30215 else
30216 isjump = 0;
30217 nbytes -= min_insn_size (start);
30218 }
30219 }
30220 continue;
30221 }
30222
30223 min_size = min_insn_size (insn);
30224 nbytes += min_size;
30225 if (dump_file)
30226 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30227 INSN_UID (insn), min_size);
30228 if ((JUMP_P (insn)
30229 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30230 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30231 || CALL_P (insn))
30232 njumps++;
30233 else
30234 continue;
30235
30236 while (njumps > 3)
30237 {
30238 start = NEXT_INSN (start);
30239 if ((JUMP_P (start)
30240 && GET_CODE (PATTERN (start)) != ADDR_VEC
30241 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30242 || CALL_P (start))
30243 njumps--, isjump = 1;
30244 else
30245 isjump = 0;
30246 nbytes -= min_insn_size (start);
30247 }
30248 gcc_assert (njumps >= 0);
30249 if (dump_file)
30250 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30251 INSN_UID (start), INSN_UID (insn), nbytes);
30252
30253 if (njumps == 3 && isjump && nbytes < 16)
30254 {
30255 int padsize = 15 - nbytes + min_insn_size (insn);
30256
30257 if (dump_file)
30258 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30259 INSN_UID (insn), padsize);
30260 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30261 }
30262 }
30263 }
30264 #endif
30265
30266 /* AMD Athlon works faster
30267 when RET is not destination of conditional jump or directly preceded
30268 by other jump instruction. We avoid the penalty by inserting NOP just
30269 before the RET instructions in such cases. */
30270 static void
30271 ix86_pad_returns (void)
30272 {
30273 edge e;
30274 edge_iterator ei;
30275
30276 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30277 {
30278 basic_block bb = e->src;
30279 rtx ret = BB_END (bb);
30280 rtx prev;
30281 bool replace = false;
30282
30283 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30284 || optimize_bb_for_size_p (bb))
30285 continue;
30286 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30287 if (active_insn_p (prev) || LABEL_P (prev))
30288 break;
30289 if (prev && LABEL_P (prev))
30290 {
30291 edge e;
30292 edge_iterator ei;
30293
30294 FOR_EACH_EDGE (e, ei, bb->preds)
30295 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30296 && !(e->flags & EDGE_FALLTHRU))
30297 replace = true;
30298 }
30299 if (!replace)
30300 {
30301 prev = prev_active_insn (ret);
30302 if (prev
30303 && ((JUMP_P (prev) && any_condjump_p (prev))
30304 || CALL_P (prev)))
30305 replace = true;
30306 /* Empty functions get branch mispredict even when
30307 the jump destination is not visible to us. */
30308 if (!prev && !optimize_function_for_size_p (cfun))
30309 replace = true;
30310 }
30311 if (replace)
30312 {
30313 emit_jump_insn_before (gen_return_internal_long (), ret);
30314 delete_insn (ret);
30315 }
30316 }
30317 }
30318
30319 /* Count the minimum number of instructions in BB. Return 4 if the
30320 number of instructions >= 4. */
30321
30322 static int
30323 ix86_count_insn_bb (basic_block bb)
30324 {
30325 rtx insn;
30326 int insn_count = 0;
30327
30328 /* Count number of instructions in this block. Return 4 if the number
30329 of instructions >= 4. */
30330 FOR_BB_INSNS (bb, insn)
30331 {
30332 /* Only happen in exit blocks. */
30333 if (JUMP_P (insn)
30334 && GET_CODE (PATTERN (insn)) == RETURN)
30335 break;
30336
30337 if (NONDEBUG_INSN_P (insn)
30338 && GET_CODE (PATTERN (insn)) != USE
30339 && GET_CODE (PATTERN (insn)) != CLOBBER)
30340 {
30341 insn_count++;
30342 if (insn_count >= 4)
30343 return insn_count;
30344 }
30345 }
30346
30347 return insn_count;
30348 }
30349
30350
30351 /* Count the minimum number of instructions in code path in BB.
30352 Return 4 if the number of instructions >= 4. */
30353
30354 static int
30355 ix86_count_insn (basic_block bb)
30356 {
30357 edge e;
30358 edge_iterator ei;
30359 int min_prev_count;
30360
30361 /* Only bother counting instructions along paths with no
30362 more than 2 basic blocks between entry and exit. Given
30363 that BB has an edge to exit, determine if a predecessor
30364 of BB has an edge from entry. If so, compute the number
30365 of instructions in the predecessor block. If there
30366 happen to be multiple such blocks, compute the minimum. */
30367 min_prev_count = 4;
30368 FOR_EACH_EDGE (e, ei, bb->preds)
30369 {
30370 edge prev_e;
30371 edge_iterator prev_ei;
30372
30373 if (e->src == ENTRY_BLOCK_PTR)
30374 {
30375 min_prev_count = 0;
30376 break;
30377 }
30378 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30379 {
30380 if (prev_e->src == ENTRY_BLOCK_PTR)
30381 {
30382 int count = ix86_count_insn_bb (e->src);
30383 if (count < min_prev_count)
30384 min_prev_count = count;
30385 break;
30386 }
30387 }
30388 }
30389
30390 if (min_prev_count < 4)
30391 min_prev_count += ix86_count_insn_bb (bb);
30392
30393 return min_prev_count;
30394 }
30395
30396 /* Pad short funtion to 4 instructions. */
30397
30398 static void
30399 ix86_pad_short_function (void)
30400 {
30401 edge e;
30402 edge_iterator ei;
30403
30404 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30405 {
30406 rtx ret = BB_END (e->src);
30407 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30408 {
30409 int insn_count = ix86_count_insn (e->src);
30410
30411 /* Pad short function. */
30412 if (insn_count < 4)
30413 {
30414 rtx insn = ret;
30415
30416 /* Find epilogue. */
30417 while (insn
30418 && (!NOTE_P (insn)
30419 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30420 insn = PREV_INSN (insn);
30421
30422 if (!insn)
30423 insn = ret;
30424
30425 /* Two NOPs count as one instruction. */
30426 insn_count = 2 * (4 - insn_count);
30427 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30428 }
30429 }
30430 }
30431 }
30432
30433 /* Implement machine specific optimizations. We implement padding of returns
30434 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30435 static void
30436 ix86_reorg (void)
30437 {
30438 /* We are freeing block_for_insn in the toplev to keep compatibility
30439 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30440 compute_bb_for_insn ();
30441
30442 if (optimize && optimize_function_for_speed_p (cfun))
30443 {
30444 if (TARGET_PAD_SHORT_FUNCTION)
30445 ix86_pad_short_function ();
30446 else if (TARGET_PAD_RETURNS)
30447 ix86_pad_returns ();
30448 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30449 if (TARGET_FOUR_JUMP_LIMIT)
30450 ix86_avoid_jump_mispredicts ();
30451 #endif
30452 }
30453
30454 /* Run the vzeroupper optimization if needed. */
30455 if (TARGET_VZEROUPPER)
30456 move_or_delete_vzeroupper ();
30457 }
30458
30459 /* Return nonzero when QImode register that must be represented via REX prefix
30460 is used. */
30461 bool
30462 x86_extended_QIreg_mentioned_p (rtx insn)
30463 {
30464 int i;
30465 extract_insn_cached (insn);
30466 for (i = 0; i < recog_data.n_operands; i++)
30467 if (REG_P (recog_data.operand[i])
30468 && REGNO (recog_data.operand[i]) > BX_REG)
30469 return true;
30470 return false;
30471 }
30472
30473 /* Return nonzero when P points to register encoded via REX prefix.
30474 Called via for_each_rtx. */
30475 static int
30476 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30477 {
30478 unsigned int regno;
30479 if (!REG_P (*p))
30480 return 0;
30481 regno = REGNO (*p);
30482 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30483 }
30484
30485 /* Return true when INSN mentions register that must be encoded using REX
30486 prefix. */
30487 bool
30488 x86_extended_reg_mentioned_p (rtx insn)
30489 {
30490 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30491 extended_reg_mentioned_1, NULL);
30492 }
30493
30494 /* If profitable, negate (without causing overflow) integer constant
30495 of mode MODE at location LOC. Return true in this case. */
30496 bool
30497 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30498 {
30499 HOST_WIDE_INT val;
30500
30501 if (!CONST_INT_P (*loc))
30502 return false;
30503
30504 switch (mode)
30505 {
30506 case DImode:
30507 /* DImode x86_64 constants must fit in 32 bits. */
30508 gcc_assert (x86_64_immediate_operand (*loc, mode));
30509
30510 mode = SImode;
30511 break;
30512
30513 case SImode:
30514 case HImode:
30515 case QImode:
30516 break;
30517
30518 default:
30519 gcc_unreachable ();
30520 }
30521
30522 /* Avoid overflows. */
30523 if (mode_signbit_p (mode, *loc))
30524 return false;
30525
30526 val = INTVAL (*loc);
30527
30528 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30529 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30530 if ((val < 0 && val != -128)
30531 || val == 128)
30532 {
30533 *loc = GEN_INT (-val);
30534 return true;
30535 }
30536
30537 return false;
30538 }
30539
30540 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30541 optabs would emit if we didn't have TFmode patterns. */
30542
30543 void
30544 x86_emit_floatuns (rtx operands[2])
30545 {
30546 rtx neglab, donelab, i0, i1, f0, in, out;
30547 enum machine_mode mode, inmode;
30548
30549 inmode = GET_MODE (operands[1]);
30550 gcc_assert (inmode == SImode || inmode == DImode);
30551
30552 out = operands[0];
30553 in = force_reg (inmode, operands[1]);
30554 mode = GET_MODE (out);
30555 neglab = gen_label_rtx ();
30556 donelab = gen_label_rtx ();
30557 f0 = gen_reg_rtx (mode);
30558
30559 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30560
30561 expand_float (out, in, 0);
30562
30563 emit_jump_insn (gen_jump (donelab));
30564 emit_barrier ();
30565
30566 emit_label (neglab);
30567
30568 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30569 1, OPTAB_DIRECT);
30570 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30571 1, OPTAB_DIRECT);
30572 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30573
30574 expand_float (f0, i0, 0);
30575
30576 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30577
30578 emit_label (donelab);
30579 }
30580 \f
30581 /* AVX does not support 32-byte integer vector operations,
30582 thus the longest vector we are faced with is V16QImode. */
30583 #define MAX_VECT_LEN 16
30584
30585 struct expand_vec_perm_d
30586 {
30587 rtx target, op0, op1;
30588 unsigned char perm[MAX_VECT_LEN];
30589 enum machine_mode vmode;
30590 unsigned char nelt;
30591 bool testing_p;
30592 };
30593
30594 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30595 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30596
30597 /* Get a vector mode of the same size as the original but with elements
30598 twice as wide. This is only guaranteed to apply to integral vectors. */
30599
30600 static inline enum machine_mode
30601 get_mode_wider_vector (enum machine_mode o)
30602 {
30603 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30604 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30605 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30606 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30607 return n;
30608 }
30609
30610 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30611 with all elements equal to VAR. Return true if successful. */
30612
30613 static bool
30614 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30615 rtx target, rtx val)
30616 {
30617 bool ok;
30618
30619 switch (mode)
30620 {
30621 case V2SImode:
30622 case V2SFmode:
30623 if (!mmx_ok)
30624 return false;
30625 /* FALLTHRU */
30626
30627 case V4DFmode:
30628 case V4DImode:
30629 case V8SFmode:
30630 case V8SImode:
30631 case V2DFmode:
30632 case V2DImode:
30633 case V4SFmode:
30634 case V4SImode:
30635 {
30636 rtx insn, dup;
30637
30638 /* First attempt to recognize VAL as-is. */
30639 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30640 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30641 if (recog_memoized (insn) < 0)
30642 {
30643 rtx seq;
30644 /* If that fails, force VAL into a register. */
30645
30646 start_sequence ();
30647 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30648 seq = get_insns ();
30649 end_sequence ();
30650 if (seq)
30651 emit_insn_before (seq, insn);
30652
30653 ok = recog_memoized (insn) >= 0;
30654 gcc_assert (ok);
30655 }
30656 }
30657 return true;
30658
30659 case V4HImode:
30660 if (!mmx_ok)
30661 return false;
30662 if (TARGET_SSE || TARGET_3DNOW_A)
30663 {
30664 rtx x;
30665
30666 val = gen_lowpart (SImode, val);
30667 x = gen_rtx_TRUNCATE (HImode, val);
30668 x = gen_rtx_VEC_DUPLICATE (mode, x);
30669 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30670 return true;
30671 }
30672 goto widen;
30673
30674 case V8QImode:
30675 if (!mmx_ok)
30676 return false;
30677 goto widen;
30678
30679 case V8HImode:
30680 if (TARGET_SSE2)
30681 {
30682 struct expand_vec_perm_d dperm;
30683 rtx tmp1, tmp2;
30684
30685 permute:
30686 memset (&dperm, 0, sizeof (dperm));
30687 dperm.target = target;
30688 dperm.vmode = mode;
30689 dperm.nelt = GET_MODE_NUNITS (mode);
30690 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30691
30692 /* Extend to SImode using a paradoxical SUBREG. */
30693 tmp1 = gen_reg_rtx (SImode);
30694 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30695
30696 /* Insert the SImode value as low element of a V4SImode vector. */
30697 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30698 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30699
30700 ok = (expand_vec_perm_1 (&dperm)
30701 || expand_vec_perm_broadcast_1 (&dperm));
30702 gcc_assert (ok);
30703 return ok;
30704 }
30705 goto widen;
30706
30707 case V16QImode:
30708 if (TARGET_SSE2)
30709 goto permute;
30710 goto widen;
30711
30712 widen:
30713 /* Replicate the value once into the next wider mode and recurse. */
30714 {
30715 enum machine_mode smode, wsmode, wvmode;
30716 rtx x;
30717
30718 smode = GET_MODE_INNER (mode);
30719 wvmode = get_mode_wider_vector (mode);
30720 wsmode = GET_MODE_INNER (wvmode);
30721
30722 val = convert_modes (wsmode, smode, val, true);
30723 x = expand_simple_binop (wsmode, ASHIFT, val,
30724 GEN_INT (GET_MODE_BITSIZE (smode)),
30725 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30726 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30727
30728 x = gen_lowpart (wvmode, target);
30729 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30730 gcc_assert (ok);
30731 return ok;
30732 }
30733
30734 case V16HImode:
30735 case V32QImode:
30736 {
30737 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30738 rtx x = gen_reg_rtx (hvmode);
30739
30740 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30741 gcc_assert (ok);
30742
30743 x = gen_rtx_VEC_CONCAT (mode, x, x);
30744 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30745 }
30746 return true;
30747
30748 default:
30749 return false;
30750 }
30751 }
30752
30753 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30754 whose ONE_VAR element is VAR, and other elements are zero. Return true
30755 if successful. */
30756
30757 static bool
30758 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30759 rtx target, rtx var, int one_var)
30760 {
30761 enum machine_mode vsimode;
30762 rtx new_target;
30763 rtx x, tmp;
30764 bool use_vector_set = false;
30765
30766 switch (mode)
30767 {
30768 case V2DImode:
30769 /* For SSE4.1, we normally use vector set. But if the second
30770 element is zero and inter-unit moves are OK, we use movq
30771 instead. */
30772 use_vector_set = (TARGET_64BIT
30773 && TARGET_SSE4_1
30774 && !(TARGET_INTER_UNIT_MOVES
30775 && one_var == 0));
30776 break;
30777 case V16QImode:
30778 case V4SImode:
30779 case V4SFmode:
30780 use_vector_set = TARGET_SSE4_1;
30781 break;
30782 case V8HImode:
30783 use_vector_set = TARGET_SSE2;
30784 break;
30785 case V4HImode:
30786 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30787 break;
30788 case V32QImode:
30789 case V16HImode:
30790 case V8SImode:
30791 case V8SFmode:
30792 case V4DFmode:
30793 use_vector_set = TARGET_AVX;
30794 break;
30795 case V4DImode:
30796 /* Use ix86_expand_vector_set in 64bit mode only. */
30797 use_vector_set = TARGET_AVX && TARGET_64BIT;
30798 break;
30799 default:
30800 break;
30801 }
30802
30803 if (use_vector_set)
30804 {
30805 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30806 var = force_reg (GET_MODE_INNER (mode), var);
30807 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30808 return true;
30809 }
30810
30811 switch (mode)
30812 {
30813 case V2SFmode:
30814 case V2SImode:
30815 if (!mmx_ok)
30816 return false;
30817 /* FALLTHRU */
30818
30819 case V2DFmode:
30820 case V2DImode:
30821 if (one_var != 0)
30822 return false;
30823 var = force_reg (GET_MODE_INNER (mode), var);
30824 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30825 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30826 return true;
30827
30828 case V4SFmode:
30829 case V4SImode:
30830 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30831 new_target = gen_reg_rtx (mode);
30832 else
30833 new_target = target;
30834 var = force_reg (GET_MODE_INNER (mode), var);
30835 x = gen_rtx_VEC_DUPLICATE (mode, var);
30836 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30837 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30838 if (one_var != 0)
30839 {
30840 /* We need to shuffle the value to the correct position, so
30841 create a new pseudo to store the intermediate result. */
30842
30843 /* With SSE2, we can use the integer shuffle insns. */
30844 if (mode != V4SFmode && TARGET_SSE2)
30845 {
30846 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30847 const1_rtx,
30848 GEN_INT (one_var == 1 ? 0 : 1),
30849 GEN_INT (one_var == 2 ? 0 : 1),
30850 GEN_INT (one_var == 3 ? 0 : 1)));
30851 if (target != new_target)
30852 emit_move_insn (target, new_target);
30853 return true;
30854 }
30855
30856 /* Otherwise convert the intermediate result to V4SFmode and
30857 use the SSE1 shuffle instructions. */
30858 if (mode != V4SFmode)
30859 {
30860 tmp = gen_reg_rtx (V4SFmode);
30861 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30862 }
30863 else
30864 tmp = new_target;
30865
30866 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30867 const1_rtx,
30868 GEN_INT (one_var == 1 ? 0 : 1),
30869 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30870 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30871
30872 if (mode != V4SFmode)
30873 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30874 else if (tmp != target)
30875 emit_move_insn (target, tmp);
30876 }
30877 else if (target != new_target)
30878 emit_move_insn (target, new_target);
30879 return true;
30880
30881 case V8HImode:
30882 case V16QImode:
30883 vsimode = V4SImode;
30884 goto widen;
30885 case V4HImode:
30886 case V8QImode:
30887 if (!mmx_ok)
30888 return false;
30889 vsimode = V2SImode;
30890 goto widen;
30891 widen:
30892 if (one_var != 0)
30893 return false;
30894
30895 /* Zero extend the variable element to SImode and recurse. */
30896 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30897
30898 x = gen_reg_rtx (vsimode);
30899 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30900 var, one_var))
30901 gcc_unreachable ();
30902
30903 emit_move_insn (target, gen_lowpart (mode, x));
30904 return true;
30905
30906 default:
30907 return false;
30908 }
30909 }
30910
30911 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30912 consisting of the values in VALS. It is known that all elements
30913 except ONE_VAR are constants. Return true if successful. */
30914
30915 static bool
30916 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30917 rtx target, rtx vals, int one_var)
30918 {
30919 rtx var = XVECEXP (vals, 0, one_var);
30920 enum machine_mode wmode;
30921 rtx const_vec, x;
30922
30923 const_vec = copy_rtx (vals);
30924 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30925 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30926
30927 switch (mode)
30928 {
30929 case V2DFmode:
30930 case V2DImode:
30931 case V2SFmode:
30932 case V2SImode:
30933 /* For the two element vectors, it's just as easy to use
30934 the general case. */
30935 return false;
30936
30937 case V4DImode:
30938 /* Use ix86_expand_vector_set in 64bit mode only. */
30939 if (!TARGET_64BIT)
30940 return false;
30941 case V4DFmode:
30942 case V8SFmode:
30943 case V8SImode:
30944 case V16HImode:
30945 case V32QImode:
30946 case V4SFmode:
30947 case V4SImode:
30948 case V8HImode:
30949 case V4HImode:
30950 break;
30951
30952 case V16QImode:
30953 if (TARGET_SSE4_1)
30954 break;
30955 wmode = V8HImode;
30956 goto widen;
30957 case V8QImode:
30958 wmode = V4HImode;
30959 goto widen;
30960 widen:
30961 /* There's no way to set one QImode entry easily. Combine
30962 the variable value with its adjacent constant value, and
30963 promote to an HImode set. */
30964 x = XVECEXP (vals, 0, one_var ^ 1);
30965 if (one_var & 1)
30966 {
30967 var = convert_modes (HImode, QImode, var, true);
30968 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30969 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30970 x = GEN_INT (INTVAL (x) & 0xff);
30971 }
30972 else
30973 {
30974 var = convert_modes (HImode, QImode, var, true);
30975 x = gen_int_mode (INTVAL (x) << 8, HImode);
30976 }
30977 if (x != const0_rtx)
30978 var = expand_simple_binop (HImode, IOR, var, x, var,
30979 1, OPTAB_LIB_WIDEN);
30980
30981 x = gen_reg_rtx (wmode);
30982 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30983 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30984
30985 emit_move_insn (target, gen_lowpart (mode, x));
30986 return true;
30987
30988 default:
30989 return false;
30990 }
30991
30992 emit_move_insn (target, const_vec);
30993 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30994 return true;
30995 }
30996
30997 /* A subroutine of ix86_expand_vector_init_general. Use vector
30998 concatenate to handle the most general case: all values variable,
30999 and none identical. */
31000
31001 static void
31002 ix86_expand_vector_init_concat (enum machine_mode mode,
31003 rtx target, rtx *ops, int n)
31004 {
31005 enum machine_mode cmode, hmode = VOIDmode;
31006 rtx first[8], second[4];
31007 rtvec v;
31008 int i, j;
31009
31010 switch (n)
31011 {
31012 case 2:
31013 switch (mode)
31014 {
31015 case V8SImode:
31016 cmode = V4SImode;
31017 break;
31018 case V8SFmode:
31019 cmode = V4SFmode;
31020 break;
31021 case V4DImode:
31022 cmode = V2DImode;
31023 break;
31024 case V4DFmode:
31025 cmode = V2DFmode;
31026 break;
31027 case V4SImode:
31028 cmode = V2SImode;
31029 break;
31030 case V4SFmode:
31031 cmode = V2SFmode;
31032 break;
31033 case V2DImode:
31034 cmode = DImode;
31035 break;
31036 case V2SImode:
31037 cmode = SImode;
31038 break;
31039 case V2DFmode:
31040 cmode = DFmode;
31041 break;
31042 case V2SFmode:
31043 cmode = SFmode;
31044 break;
31045 default:
31046 gcc_unreachable ();
31047 }
31048
31049 if (!register_operand (ops[1], cmode))
31050 ops[1] = force_reg (cmode, ops[1]);
31051 if (!register_operand (ops[0], cmode))
31052 ops[0] = force_reg (cmode, ops[0]);
31053 emit_insn (gen_rtx_SET (VOIDmode, target,
31054 gen_rtx_VEC_CONCAT (mode, ops[0],
31055 ops[1])));
31056 break;
31057
31058 case 4:
31059 switch (mode)
31060 {
31061 case V4DImode:
31062 cmode = V2DImode;
31063 break;
31064 case V4DFmode:
31065 cmode = V2DFmode;
31066 break;
31067 case V4SImode:
31068 cmode = V2SImode;
31069 break;
31070 case V4SFmode:
31071 cmode = V2SFmode;
31072 break;
31073 default:
31074 gcc_unreachable ();
31075 }
31076 goto half;
31077
31078 case 8:
31079 switch (mode)
31080 {
31081 case V8SImode:
31082 cmode = V2SImode;
31083 hmode = V4SImode;
31084 break;
31085 case V8SFmode:
31086 cmode = V2SFmode;
31087 hmode = V4SFmode;
31088 break;
31089 default:
31090 gcc_unreachable ();
31091 }
31092 goto half;
31093
31094 half:
31095 /* FIXME: We process inputs backward to help RA. PR 36222. */
31096 i = n - 1;
31097 j = (n >> 1) - 1;
31098 for (; i > 0; i -= 2, j--)
31099 {
31100 first[j] = gen_reg_rtx (cmode);
31101 v = gen_rtvec (2, ops[i - 1], ops[i]);
31102 ix86_expand_vector_init (false, first[j],
31103 gen_rtx_PARALLEL (cmode, v));
31104 }
31105
31106 n >>= 1;
31107 if (n > 2)
31108 {
31109 gcc_assert (hmode != VOIDmode);
31110 for (i = j = 0; i < n; i += 2, j++)
31111 {
31112 second[j] = gen_reg_rtx (hmode);
31113 ix86_expand_vector_init_concat (hmode, second [j],
31114 &first [i], 2);
31115 }
31116 n >>= 1;
31117 ix86_expand_vector_init_concat (mode, target, second, n);
31118 }
31119 else
31120 ix86_expand_vector_init_concat (mode, target, first, n);
31121 break;
31122
31123 default:
31124 gcc_unreachable ();
31125 }
31126 }
31127
31128 /* A subroutine of ix86_expand_vector_init_general. Use vector
31129 interleave to handle the most general case: all values variable,
31130 and none identical. */
31131
31132 static void
31133 ix86_expand_vector_init_interleave (enum machine_mode mode,
31134 rtx target, rtx *ops, int n)
31135 {
31136 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31137 int i, j;
31138 rtx op0, op1;
31139 rtx (*gen_load_even) (rtx, rtx, rtx);
31140 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31141 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31142
31143 switch (mode)
31144 {
31145 case V8HImode:
31146 gen_load_even = gen_vec_setv8hi;
31147 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31148 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31149 inner_mode = HImode;
31150 first_imode = V4SImode;
31151 second_imode = V2DImode;
31152 third_imode = VOIDmode;
31153 break;
31154 case V16QImode:
31155 gen_load_even = gen_vec_setv16qi;
31156 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31157 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31158 inner_mode = QImode;
31159 first_imode = V8HImode;
31160 second_imode = V4SImode;
31161 third_imode = V2DImode;
31162 break;
31163 default:
31164 gcc_unreachable ();
31165 }
31166
31167 for (i = 0; i < n; i++)
31168 {
31169 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31170 op0 = gen_reg_rtx (SImode);
31171 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31172
31173 /* Insert the SImode value as low element of V4SImode vector. */
31174 op1 = gen_reg_rtx (V4SImode);
31175 op0 = gen_rtx_VEC_MERGE (V4SImode,
31176 gen_rtx_VEC_DUPLICATE (V4SImode,
31177 op0),
31178 CONST0_RTX (V4SImode),
31179 const1_rtx);
31180 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31181
31182 /* Cast the V4SImode vector back to a vector in orignal mode. */
31183 op0 = gen_reg_rtx (mode);
31184 emit_move_insn (op0, gen_lowpart (mode, op1));
31185
31186 /* Load even elements into the second positon. */
31187 emit_insn (gen_load_even (op0,
31188 force_reg (inner_mode,
31189 ops [i + i + 1]),
31190 const1_rtx));
31191
31192 /* Cast vector to FIRST_IMODE vector. */
31193 ops[i] = gen_reg_rtx (first_imode);
31194 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31195 }
31196
31197 /* Interleave low FIRST_IMODE vectors. */
31198 for (i = j = 0; i < n; i += 2, j++)
31199 {
31200 op0 = gen_reg_rtx (first_imode);
31201 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31202
31203 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31204 ops[j] = gen_reg_rtx (second_imode);
31205 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31206 }
31207
31208 /* Interleave low SECOND_IMODE vectors. */
31209 switch (second_imode)
31210 {
31211 case V4SImode:
31212 for (i = j = 0; i < n / 2; i += 2, j++)
31213 {
31214 op0 = gen_reg_rtx (second_imode);
31215 emit_insn (gen_interleave_second_low (op0, ops[i],
31216 ops[i + 1]));
31217
31218 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31219 vector. */
31220 ops[j] = gen_reg_rtx (third_imode);
31221 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31222 }
31223 second_imode = V2DImode;
31224 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31225 /* FALLTHRU */
31226
31227 case V2DImode:
31228 op0 = gen_reg_rtx (second_imode);
31229 emit_insn (gen_interleave_second_low (op0, ops[0],
31230 ops[1]));
31231
31232 /* Cast the SECOND_IMODE vector back to a vector on original
31233 mode. */
31234 emit_insn (gen_rtx_SET (VOIDmode, target,
31235 gen_lowpart (mode, op0)));
31236 break;
31237
31238 default:
31239 gcc_unreachable ();
31240 }
31241 }
31242
31243 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31244 all values variable, and none identical. */
31245
31246 static void
31247 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31248 rtx target, rtx vals)
31249 {
31250 rtx ops[32], op0, op1;
31251 enum machine_mode half_mode = VOIDmode;
31252 int n, i;
31253
31254 switch (mode)
31255 {
31256 case V2SFmode:
31257 case V2SImode:
31258 if (!mmx_ok && !TARGET_SSE)
31259 break;
31260 /* FALLTHRU */
31261
31262 case V8SFmode:
31263 case V8SImode:
31264 case V4DFmode:
31265 case V4DImode:
31266 case V4SFmode:
31267 case V4SImode:
31268 case V2DFmode:
31269 case V2DImode:
31270 n = GET_MODE_NUNITS (mode);
31271 for (i = 0; i < n; i++)
31272 ops[i] = XVECEXP (vals, 0, i);
31273 ix86_expand_vector_init_concat (mode, target, ops, n);
31274 return;
31275
31276 case V32QImode:
31277 half_mode = V16QImode;
31278 goto half;
31279
31280 case V16HImode:
31281 half_mode = V8HImode;
31282 goto half;
31283
31284 half:
31285 n = GET_MODE_NUNITS (mode);
31286 for (i = 0; i < n; i++)
31287 ops[i] = XVECEXP (vals, 0, i);
31288 op0 = gen_reg_rtx (half_mode);
31289 op1 = gen_reg_rtx (half_mode);
31290 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31291 n >> 2);
31292 ix86_expand_vector_init_interleave (half_mode, op1,
31293 &ops [n >> 1], n >> 2);
31294 emit_insn (gen_rtx_SET (VOIDmode, target,
31295 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31296 return;
31297
31298 case V16QImode:
31299 if (!TARGET_SSE4_1)
31300 break;
31301 /* FALLTHRU */
31302
31303 case V8HImode:
31304 if (!TARGET_SSE2)
31305 break;
31306
31307 /* Don't use ix86_expand_vector_init_interleave if we can't
31308 move from GPR to SSE register directly. */
31309 if (!TARGET_INTER_UNIT_MOVES)
31310 break;
31311
31312 n = GET_MODE_NUNITS (mode);
31313 for (i = 0; i < n; i++)
31314 ops[i] = XVECEXP (vals, 0, i);
31315 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31316 return;
31317
31318 case V4HImode:
31319 case V8QImode:
31320 break;
31321
31322 default:
31323 gcc_unreachable ();
31324 }
31325
31326 {
31327 int i, j, n_elts, n_words, n_elt_per_word;
31328 enum machine_mode inner_mode;
31329 rtx words[4], shift;
31330
31331 inner_mode = GET_MODE_INNER (mode);
31332 n_elts = GET_MODE_NUNITS (mode);
31333 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31334 n_elt_per_word = n_elts / n_words;
31335 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31336
31337 for (i = 0; i < n_words; ++i)
31338 {
31339 rtx word = NULL_RTX;
31340
31341 for (j = 0; j < n_elt_per_word; ++j)
31342 {
31343 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31344 elt = convert_modes (word_mode, inner_mode, elt, true);
31345
31346 if (j == 0)
31347 word = elt;
31348 else
31349 {
31350 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31351 word, 1, OPTAB_LIB_WIDEN);
31352 word = expand_simple_binop (word_mode, IOR, word, elt,
31353 word, 1, OPTAB_LIB_WIDEN);
31354 }
31355 }
31356
31357 words[i] = word;
31358 }
31359
31360 if (n_words == 1)
31361 emit_move_insn (target, gen_lowpart (mode, words[0]));
31362 else if (n_words == 2)
31363 {
31364 rtx tmp = gen_reg_rtx (mode);
31365 emit_clobber (tmp);
31366 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31367 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31368 emit_move_insn (target, tmp);
31369 }
31370 else if (n_words == 4)
31371 {
31372 rtx tmp = gen_reg_rtx (V4SImode);
31373 gcc_assert (word_mode == SImode);
31374 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31375 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31376 emit_move_insn (target, gen_lowpart (mode, tmp));
31377 }
31378 else
31379 gcc_unreachable ();
31380 }
31381 }
31382
31383 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31384 instructions unless MMX_OK is true. */
31385
31386 void
31387 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31388 {
31389 enum machine_mode mode = GET_MODE (target);
31390 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31391 int n_elts = GET_MODE_NUNITS (mode);
31392 int n_var = 0, one_var = -1;
31393 bool all_same = true, all_const_zero = true;
31394 int i;
31395 rtx x;
31396
31397 for (i = 0; i < n_elts; ++i)
31398 {
31399 x = XVECEXP (vals, 0, i);
31400 if (!(CONST_INT_P (x)
31401 || GET_CODE (x) == CONST_DOUBLE
31402 || GET_CODE (x) == CONST_FIXED))
31403 n_var++, one_var = i;
31404 else if (x != CONST0_RTX (inner_mode))
31405 all_const_zero = false;
31406 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31407 all_same = false;
31408 }
31409
31410 /* Constants are best loaded from the constant pool. */
31411 if (n_var == 0)
31412 {
31413 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31414 return;
31415 }
31416
31417 /* If all values are identical, broadcast the value. */
31418 if (all_same
31419 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31420 XVECEXP (vals, 0, 0)))
31421 return;
31422
31423 /* Values where only one field is non-constant are best loaded from
31424 the pool and overwritten via move later. */
31425 if (n_var == 1)
31426 {
31427 if (all_const_zero
31428 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31429 XVECEXP (vals, 0, one_var),
31430 one_var))
31431 return;
31432
31433 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31434 return;
31435 }
31436
31437 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31438 }
31439
31440 void
31441 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31442 {
31443 enum machine_mode mode = GET_MODE (target);
31444 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31445 enum machine_mode half_mode;
31446 bool use_vec_merge = false;
31447 rtx tmp;
31448 static rtx (*gen_extract[6][2]) (rtx, rtx)
31449 = {
31450 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31451 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31452 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31453 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31454 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31455 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31456 };
31457 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31458 = {
31459 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31460 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31461 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31462 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31463 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31464 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31465 };
31466 int i, j, n;
31467
31468 switch (mode)
31469 {
31470 case V2SFmode:
31471 case V2SImode:
31472 if (mmx_ok)
31473 {
31474 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31475 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31476 if (elt == 0)
31477 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31478 else
31479 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31480 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31481 return;
31482 }
31483 break;
31484
31485 case V2DImode:
31486 use_vec_merge = TARGET_SSE4_1;
31487 if (use_vec_merge)
31488 break;
31489
31490 case V2DFmode:
31491 {
31492 rtx op0, op1;
31493
31494 /* For the two element vectors, we implement a VEC_CONCAT with
31495 the extraction of the other element. */
31496
31497 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31498 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31499
31500 if (elt == 0)
31501 op0 = val, op1 = tmp;
31502 else
31503 op0 = tmp, op1 = val;
31504
31505 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31506 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31507 }
31508 return;
31509
31510 case V4SFmode:
31511 use_vec_merge = TARGET_SSE4_1;
31512 if (use_vec_merge)
31513 break;
31514
31515 switch (elt)
31516 {
31517 case 0:
31518 use_vec_merge = true;
31519 break;
31520
31521 case 1:
31522 /* tmp = target = A B C D */
31523 tmp = copy_to_reg (target);
31524 /* target = A A B B */
31525 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31526 /* target = X A B B */
31527 ix86_expand_vector_set (false, target, val, 0);
31528 /* target = A X C D */
31529 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31530 const1_rtx, const0_rtx,
31531 GEN_INT (2+4), GEN_INT (3+4)));
31532 return;
31533
31534 case 2:
31535 /* tmp = target = A B C D */
31536 tmp = copy_to_reg (target);
31537 /* tmp = X B C D */
31538 ix86_expand_vector_set (false, tmp, val, 0);
31539 /* target = A B X D */
31540 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31541 const0_rtx, const1_rtx,
31542 GEN_INT (0+4), GEN_INT (3+4)));
31543 return;
31544
31545 case 3:
31546 /* tmp = target = A B C D */
31547 tmp = copy_to_reg (target);
31548 /* tmp = X B C D */
31549 ix86_expand_vector_set (false, tmp, val, 0);
31550 /* target = A B X D */
31551 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31552 const0_rtx, const1_rtx,
31553 GEN_INT (2+4), GEN_INT (0+4)));
31554 return;
31555
31556 default:
31557 gcc_unreachable ();
31558 }
31559 break;
31560
31561 case V4SImode:
31562 use_vec_merge = TARGET_SSE4_1;
31563 if (use_vec_merge)
31564 break;
31565
31566 /* Element 0 handled by vec_merge below. */
31567 if (elt == 0)
31568 {
31569 use_vec_merge = true;
31570 break;
31571 }
31572
31573 if (TARGET_SSE2)
31574 {
31575 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31576 store into element 0, then shuffle them back. */
31577
31578 rtx order[4];
31579
31580 order[0] = GEN_INT (elt);
31581 order[1] = const1_rtx;
31582 order[2] = const2_rtx;
31583 order[3] = GEN_INT (3);
31584 order[elt] = const0_rtx;
31585
31586 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31587 order[1], order[2], order[3]));
31588
31589 ix86_expand_vector_set (false, target, val, 0);
31590
31591 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31592 order[1], order[2], order[3]));
31593 }
31594 else
31595 {
31596 /* For SSE1, we have to reuse the V4SF code. */
31597 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31598 gen_lowpart (SFmode, val), elt);
31599 }
31600 return;
31601
31602 case V8HImode:
31603 use_vec_merge = TARGET_SSE2;
31604 break;
31605 case V4HImode:
31606 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31607 break;
31608
31609 case V16QImode:
31610 use_vec_merge = TARGET_SSE4_1;
31611 break;
31612
31613 case V8QImode:
31614 break;
31615
31616 case V32QImode:
31617 half_mode = V16QImode;
31618 j = 0;
31619 n = 16;
31620 goto half;
31621
31622 case V16HImode:
31623 half_mode = V8HImode;
31624 j = 1;
31625 n = 8;
31626 goto half;
31627
31628 case V8SImode:
31629 half_mode = V4SImode;
31630 j = 2;
31631 n = 4;
31632 goto half;
31633
31634 case V4DImode:
31635 half_mode = V2DImode;
31636 j = 3;
31637 n = 2;
31638 goto half;
31639
31640 case V8SFmode:
31641 half_mode = V4SFmode;
31642 j = 4;
31643 n = 4;
31644 goto half;
31645
31646 case V4DFmode:
31647 half_mode = V2DFmode;
31648 j = 5;
31649 n = 2;
31650 goto half;
31651
31652 half:
31653 /* Compute offset. */
31654 i = elt / n;
31655 elt %= n;
31656
31657 gcc_assert (i <= 1);
31658
31659 /* Extract the half. */
31660 tmp = gen_reg_rtx (half_mode);
31661 emit_insn (gen_extract[j][i] (tmp, target));
31662
31663 /* Put val in tmp at elt. */
31664 ix86_expand_vector_set (false, tmp, val, elt);
31665
31666 /* Put it back. */
31667 emit_insn (gen_insert[j][i] (target, target, tmp));
31668 return;
31669
31670 default:
31671 break;
31672 }
31673
31674 if (use_vec_merge)
31675 {
31676 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31677 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31678 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31679 }
31680 else
31681 {
31682 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31683
31684 emit_move_insn (mem, target);
31685
31686 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31687 emit_move_insn (tmp, val);
31688
31689 emit_move_insn (target, mem);
31690 }
31691 }
31692
31693 void
31694 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31695 {
31696 enum machine_mode mode = GET_MODE (vec);
31697 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31698 bool use_vec_extr = false;
31699 rtx tmp;
31700
31701 switch (mode)
31702 {
31703 case V2SImode:
31704 case V2SFmode:
31705 if (!mmx_ok)
31706 break;
31707 /* FALLTHRU */
31708
31709 case V2DFmode:
31710 case V2DImode:
31711 use_vec_extr = true;
31712 break;
31713
31714 case V4SFmode:
31715 use_vec_extr = TARGET_SSE4_1;
31716 if (use_vec_extr)
31717 break;
31718
31719 switch (elt)
31720 {
31721 case 0:
31722 tmp = vec;
31723 break;
31724
31725 case 1:
31726 case 3:
31727 tmp = gen_reg_rtx (mode);
31728 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31729 GEN_INT (elt), GEN_INT (elt),
31730 GEN_INT (elt+4), GEN_INT (elt+4)));
31731 break;
31732
31733 case 2:
31734 tmp = gen_reg_rtx (mode);
31735 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31736 break;
31737
31738 default:
31739 gcc_unreachable ();
31740 }
31741 vec = tmp;
31742 use_vec_extr = true;
31743 elt = 0;
31744 break;
31745
31746 case V4SImode:
31747 use_vec_extr = TARGET_SSE4_1;
31748 if (use_vec_extr)
31749 break;
31750
31751 if (TARGET_SSE2)
31752 {
31753 switch (elt)
31754 {
31755 case 0:
31756 tmp = vec;
31757 break;
31758
31759 case 1:
31760 case 3:
31761 tmp = gen_reg_rtx (mode);
31762 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31763 GEN_INT (elt), GEN_INT (elt),
31764 GEN_INT (elt), GEN_INT (elt)));
31765 break;
31766
31767 case 2:
31768 tmp = gen_reg_rtx (mode);
31769 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31770 break;
31771
31772 default:
31773 gcc_unreachable ();
31774 }
31775 vec = tmp;
31776 use_vec_extr = true;
31777 elt = 0;
31778 }
31779 else
31780 {
31781 /* For SSE1, we have to reuse the V4SF code. */
31782 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31783 gen_lowpart (V4SFmode, vec), elt);
31784 return;
31785 }
31786 break;
31787
31788 case V8HImode:
31789 use_vec_extr = TARGET_SSE2;
31790 break;
31791 case V4HImode:
31792 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31793 break;
31794
31795 case V16QImode:
31796 use_vec_extr = TARGET_SSE4_1;
31797 break;
31798
31799 case V8QImode:
31800 /* ??? Could extract the appropriate HImode element and shift. */
31801 default:
31802 break;
31803 }
31804
31805 if (use_vec_extr)
31806 {
31807 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31808 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31809
31810 /* Let the rtl optimizers know about the zero extension performed. */
31811 if (inner_mode == QImode || inner_mode == HImode)
31812 {
31813 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31814 target = gen_lowpart (SImode, target);
31815 }
31816
31817 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31818 }
31819 else
31820 {
31821 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31822
31823 emit_move_insn (mem, vec);
31824
31825 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31826 emit_move_insn (target, tmp);
31827 }
31828 }
31829
31830 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31831 pattern to reduce; DEST is the destination; IN is the input vector. */
31832
31833 void
31834 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31835 {
31836 rtx tmp1, tmp2, tmp3;
31837
31838 tmp1 = gen_reg_rtx (V4SFmode);
31839 tmp2 = gen_reg_rtx (V4SFmode);
31840 tmp3 = gen_reg_rtx (V4SFmode);
31841
31842 emit_insn (gen_sse_movhlps (tmp1, in, in));
31843 emit_insn (fn (tmp2, tmp1, in));
31844
31845 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31846 const1_rtx, const1_rtx,
31847 GEN_INT (1+4), GEN_INT (1+4)));
31848 emit_insn (fn (dest, tmp2, tmp3));
31849 }
31850 \f
31851 /* Target hook for scalar_mode_supported_p. */
31852 static bool
31853 ix86_scalar_mode_supported_p (enum machine_mode mode)
31854 {
31855 if (DECIMAL_FLOAT_MODE_P (mode))
31856 return default_decimal_float_supported_p ();
31857 else if (mode == TFmode)
31858 return true;
31859 else
31860 return default_scalar_mode_supported_p (mode);
31861 }
31862
31863 /* Implements target hook vector_mode_supported_p. */
31864 static bool
31865 ix86_vector_mode_supported_p (enum machine_mode mode)
31866 {
31867 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31868 return true;
31869 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31870 return true;
31871 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31872 return true;
31873 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31874 return true;
31875 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31876 return true;
31877 return false;
31878 }
31879
31880 /* Target hook for c_mode_for_suffix. */
31881 static enum machine_mode
31882 ix86_c_mode_for_suffix (char suffix)
31883 {
31884 if (suffix == 'q')
31885 return TFmode;
31886 if (suffix == 'w')
31887 return XFmode;
31888
31889 return VOIDmode;
31890 }
31891
31892 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31893
31894 We do this in the new i386 backend to maintain source compatibility
31895 with the old cc0-based compiler. */
31896
31897 static tree
31898 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31899 tree inputs ATTRIBUTE_UNUSED,
31900 tree clobbers)
31901 {
31902 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31903 clobbers);
31904 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31905 clobbers);
31906 return clobbers;
31907 }
31908
31909 /* Implements target vector targetm.asm.encode_section_info. This
31910 is not used by netware. */
31911
31912 static void ATTRIBUTE_UNUSED
31913 ix86_encode_section_info (tree decl, rtx rtl, int first)
31914 {
31915 default_encode_section_info (decl, rtl, first);
31916
31917 if (TREE_CODE (decl) == VAR_DECL
31918 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31919 && ix86_in_large_data_p (decl))
31920 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31921 }
31922
31923 /* Worker function for REVERSE_CONDITION. */
31924
31925 enum rtx_code
31926 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31927 {
31928 return (mode != CCFPmode && mode != CCFPUmode
31929 ? reverse_condition (code)
31930 : reverse_condition_maybe_unordered (code));
31931 }
31932
31933 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31934 to OPERANDS[0]. */
31935
31936 const char *
31937 output_387_reg_move (rtx insn, rtx *operands)
31938 {
31939 if (REG_P (operands[0]))
31940 {
31941 if (REG_P (operands[1])
31942 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31943 {
31944 if (REGNO (operands[0]) == FIRST_STACK_REG)
31945 return output_387_ffreep (operands, 0);
31946 return "fstp\t%y0";
31947 }
31948 if (STACK_TOP_P (operands[0]))
31949 return "fld%Z1\t%y1";
31950 return "fst\t%y0";
31951 }
31952 else if (MEM_P (operands[0]))
31953 {
31954 gcc_assert (REG_P (operands[1]));
31955 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31956 return "fstp%Z0\t%y0";
31957 else
31958 {
31959 /* There is no non-popping store to memory for XFmode.
31960 So if we need one, follow the store with a load. */
31961 if (GET_MODE (operands[0]) == XFmode)
31962 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31963 else
31964 return "fst%Z0\t%y0";
31965 }
31966 }
31967 else
31968 gcc_unreachable();
31969 }
31970
31971 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31972 FP status register is set. */
31973
31974 void
31975 ix86_emit_fp_unordered_jump (rtx label)
31976 {
31977 rtx reg = gen_reg_rtx (HImode);
31978 rtx temp;
31979
31980 emit_insn (gen_x86_fnstsw_1 (reg));
31981
31982 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31983 {
31984 emit_insn (gen_x86_sahf_1 (reg));
31985
31986 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31987 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31988 }
31989 else
31990 {
31991 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31992
31993 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31994 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31995 }
31996
31997 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31998 gen_rtx_LABEL_REF (VOIDmode, label),
31999 pc_rtx);
32000 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32001
32002 emit_jump_insn (temp);
32003 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32004 }
32005
32006 /* Output code to perform a log1p XFmode calculation. */
32007
32008 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32009 {
32010 rtx label1 = gen_label_rtx ();
32011 rtx label2 = gen_label_rtx ();
32012
32013 rtx tmp = gen_reg_rtx (XFmode);
32014 rtx tmp2 = gen_reg_rtx (XFmode);
32015 rtx test;
32016
32017 emit_insn (gen_absxf2 (tmp, op1));
32018 test = gen_rtx_GE (VOIDmode, tmp,
32019 CONST_DOUBLE_FROM_REAL_VALUE (
32020 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32021 XFmode));
32022 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32023
32024 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32025 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32026 emit_jump (label2);
32027
32028 emit_label (label1);
32029 emit_move_insn (tmp, CONST1_RTX (XFmode));
32030 emit_insn (gen_addxf3 (tmp, op1, tmp));
32031 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32032 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32033
32034 emit_label (label2);
32035 }
32036
32037 /* Output code to perform a Newton-Rhapson approximation of a single precision
32038 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32039
32040 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32041 {
32042 rtx x0, x1, e0, e1;
32043
32044 x0 = gen_reg_rtx (mode);
32045 e0 = gen_reg_rtx (mode);
32046 e1 = gen_reg_rtx (mode);
32047 x1 = gen_reg_rtx (mode);
32048
32049 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32050
32051 /* x0 = rcp(b) estimate */
32052 emit_insn (gen_rtx_SET (VOIDmode, x0,
32053 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32054 UNSPEC_RCP)));
32055 /* e0 = x0 * b */
32056 emit_insn (gen_rtx_SET (VOIDmode, e0,
32057 gen_rtx_MULT (mode, x0, b)));
32058
32059 /* e0 = x0 * e0 */
32060 emit_insn (gen_rtx_SET (VOIDmode, e0,
32061 gen_rtx_MULT (mode, x0, e0)));
32062
32063 /* e1 = x0 + x0 */
32064 emit_insn (gen_rtx_SET (VOIDmode, e1,
32065 gen_rtx_PLUS (mode, x0, x0)));
32066
32067 /* x1 = e1 - e0 */
32068 emit_insn (gen_rtx_SET (VOIDmode, x1,
32069 gen_rtx_MINUS (mode, e1, e0)));
32070
32071 /* res = a * x1 */
32072 emit_insn (gen_rtx_SET (VOIDmode, res,
32073 gen_rtx_MULT (mode, a, x1)));
32074 }
32075
32076 /* Output code to perform a Newton-Rhapson approximation of a
32077 single precision floating point [reciprocal] square root. */
32078
32079 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32080 bool recip)
32081 {
32082 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32083 REAL_VALUE_TYPE r;
32084
32085 x0 = gen_reg_rtx (mode);
32086 e0 = gen_reg_rtx (mode);
32087 e1 = gen_reg_rtx (mode);
32088 e2 = gen_reg_rtx (mode);
32089 e3 = gen_reg_rtx (mode);
32090
32091 real_from_integer (&r, VOIDmode, -3, -1, 0);
32092 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32093
32094 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32095 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32096
32097 if (VECTOR_MODE_P (mode))
32098 {
32099 mthree = ix86_build_const_vector (mode, true, mthree);
32100 mhalf = ix86_build_const_vector (mode, true, mhalf);
32101 }
32102
32103 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32104 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32105
32106 /* x0 = rsqrt(a) estimate */
32107 emit_insn (gen_rtx_SET (VOIDmode, x0,
32108 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32109 UNSPEC_RSQRT)));
32110
32111 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32112 if (!recip)
32113 {
32114 rtx zero, mask;
32115
32116 zero = gen_reg_rtx (mode);
32117 mask = gen_reg_rtx (mode);
32118
32119 zero = force_reg (mode, CONST0_RTX(mode));
32120 emit_insn (gen_rtx_SET (VOIDmode, mask,
32121 gen_rtx_NE (mode, zero, a)));
32122
32123 emit_insn (gen_rtx_SET (VOIDmode, x0,
32124 gen_rtx_AND (mode, x0, mask)));
32125 }
32126
32127 /* e0 = x0 * a */
32128 emit_insn (gen_rtx_SET (VOIDmode, e0,
32129 gen_rtx_MULT (mode, x0, a)));
32130 /* e1 = e0 * x0 */
32131 emit_insn (gen_rtx_SET (VOIDmode, e1,
32132 gen_rtx_MULT (mode, e0, x0)));
32133
32134 /* e2 = e1 - 3. */
32135 mthree = force_reg (mode, mthree);
32136 emit_insn (gen_rtx_SET (VOIDmode, e2,
32137 gen_rtx_PLUS (mode, e1, mthree)));
32138
32139 mhalf = force_reg (mode, mhalf);
32140 if (recip)
32141 /* e3 = -.5 * x0 */
32142 emit_insn (gen_rtx_SET (VOIDmode, e3,
32143 gen_rtx_MULT (mode, x0, mhalf)));
32144 else
32145 /* e3 = -.5 * e0 */
32146 emit_insn (gen_rtx_SET (VOIDmode, e3,
32147 gen_rtx_MULT (mode, e0, mhalf)));
32148 /* ret = e2 * e3 */
32149 emit_insn (gen_rtx_SET (VOIDmode, res,
32150 gen_rtx_MULT (mode, e2, e3)));
32151 }
32152
32153 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32154
32155 static void ATTRIBUTE_UNUSED
32156 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32157 tree decl)
32158 {
32159 /* With Binutils 2.15, the "@unwind" marker must be specified on
32160 every occurrence of the ".eh_frame" section, not just the first
32161 one. */
32162 if (TARGET_64BIT
32163 && strcmp (name, ".eh_frame") == 0)
32164 {
32165 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32166 flags & SECTION_WRITE ? "aw" : "a");
32167 return;
32168 }
32169 default_elf_asm_named_section (name, flags, decl);
32170 }
32171
32172 /* Return the mangling of TYPE if it is an extended fundamental type. */
32173
32174 static const char *
32175 ix86_mangle_type (const_tree type)
32176 {
32177 type = TYPE_MAIN_VARIANT (type);
32178
32179 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32180 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32181 return NULL;
32182
32183 switch (TYPE_MODE (type))
32184 {
32185 case TFmode:
32186 /* __float128 is "g". */
32187 return "g";
32188 case XFmode:
32189 /* "long double" or __float80 is "e". */
32190 return "e";
32191 default:
32192 return NULL;
32193 }
32194 }
32195
32196 /* For 32-bit code we can save PIC register setup by using
32197 __stack_chk_fail_local hidden function instead of calling
32198 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32199 register, so it is better to call __stack_chk_fail directly. */
32200
32201 static tree
32202 ix86_stack_protect_fail (void)
32203 {
32204 return TARGET_64BIT
32205 ? default_external_stack_protect_fail ()
32206 : default_hidden_stack_protect_fail ();
32207 }
32208
32209 /* Select a format to encode pointers in exception handling data. CODE
32210 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32211 true if the symbol may be affected by dynamic relocations.
32212
32213 ??? All x86 object file formats are capable of representing this.
32214 After all, the relocation needed is the same as for the call insn.
32215 Whether or not a particular assembler allows us to enter such, I
32216 guess we'll have to see. */
32217 int
32218 asm_preferred_eh_data_format (int code, int global)
32219 {
32220 if (flag_pic)
32221 {
32222 int type = DW_EH_PE_sdata8;
32223 if (!TARGET_64BIT
32224 || ix86_cmodel == CM_SMALL_PIC
32225 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32226 type = DW_EH_PE_sdata4;
32227 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32228 }
32229 if (ix86_cmodel == CM_SMALL
32230 || (ix86_cmodel == CM_MEDIUM && code))
32231 return DW_EH_PE_udata4;
32232 return DW_EH_PE_absptr;
32233 }
32234 \f
32235 /* Expand copysign from SIGN to the positive value ABS_VALUE
32236 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32237 the sign-bit. */
32238 static void
32239 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32240 {
32241 enum machine_mode mode = GET_MODE (sign);
32242 rtx sgn = gen_reg_rtx (mode);
32243 if (mask == NULL_RTX)
32244 {
32245 enum machine_mode vmode;
32246
32247 if (mode == SFmode)
32248 vmode = V4SFmode;
32249 else if (mode == DFmode)
32250 vmode = V2DFmode;
32251 else
32252 vmode = mode;
32253
32254 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32255 if (!VECTOR_MODE_P (mode))
32256 {
32257 /* We need to generate a scalar mode mask in this case. */
32258 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32259 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32260 mask = gen_reg_rtx (mode);
32261 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32262 }
32263 }
32264 else
32265 mask = gen_rtx_NOT (mode, mask);
32266 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32267 gen_rtx_AND (mode, mask, sign)));
32268 emit_insn (gen_rtx_SET (VOIDmode, result,
32269 gen_rtx_IOR (mode, abs_value, sgn)));
32270 }
32271
32272 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32273 mask for masking out the sign-bit is stored in *SMASK, if that is
32274 non-null. */
32275 static rtx
32276 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32277 {
32278 enum machine_mode vmode, mode = GET_MODE (op0);
32279 rtx xa, mask;
32280
32281 xa = gen_reg_rtx (mode);
32282 if (mode == SFmode)
32283 vmode = V4SFmode;
32284 else if (mode == DFmode)
32285 vmode = V2DFmode;
32286 else
32287 vmode = mode;
32288 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32289 if (!VECTOR_MODE_P (mode))
32290 {
32291 /* We need to generate a scalar mode mask in this case. */
32292 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32293 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32294 mask = gen_reg_rtx (mode);
32295 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32296 }
32297 emit_insn (gen_rtx_SET (VOIDmode, xa,
32298 gen_rtx_AND (mode, op0, mask)));
32299
32300 if (smask)
32301 *smask = mask;
32302
32303 return xa;
32304 }
32305
32306 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32307 swapping the operands if SWAP_OPERANDS is true. The expanded
32308 code is a forward jump to a newly created label in case the
32309 comparison is true. The generated label rtx is returned. */
32310 static rtx
32311 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32312 bool swap_operands)
32313 {
32314 rtx label, tmp;
32315
32316 if (swap_operands)
32317 {
32318 tmp = op0;
32319 op0 = op1;
32320 op1 = tmp;
32321 }
32322
32323 label = gen_label_rtx ();
32324 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32325 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32326 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32327 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32328 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32329 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32330 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32331 JUMP_LABEL (tmp) = label;
32332
32333 return label;
32334 }
32335
32336 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32337 using comparison code CODE. Operands are swapped for the comparison if
32338 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32339 static rtx
32340 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32341 bool swap_operands)
32342 {
32343 rtx (*insn)(rtx, rtx, rtx, rtx);
32344 enum machine_mode mode = GET_MODE (op0);
32345 rtx mask = gen_reg_rtx (mode);
32346
32347 if (swap_operands)
32348 {
32349 rtx tmp = op0;
32350 op0 = op1;
32351 op1 = tmp;
32352 }
32353
32354 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32355
32356 emit_insn (insn (mask, op0, op1,
32357 gen_rtx_fmt_ee (code, mode, op0, op1)));
32358 return mask;
32359 }
32360
32361 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32362 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32363 static rtx
32364 ix86_gen_TWO52 (enum machine_mode mode)
32365 {
32366 REAL_VALUE_TYPE TWO52r;
32367 rtx TWO52;
32368
32369 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32370 TWO52 = const_double_from_real_value (TWO52r, mode);
32371 TWO52 = force_reg (mode, TWO52);
32372
32373 return TWO52;
32374 }
32375
32376 /* Expand SSE sequence for computing lround from OP1 storing
32377 into OP0. */
32378 void
32379 ix86_expand_lround (rtx op0, rtx op1)
32380 {
32381 /* C code for the stuff we're doing below:
32382 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32383 return (long)tmp;
32384 */
32385 enum machine_mode mode = GET_MODE (op1);
32386 const struct real_format *fmt;
32387 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32388 rtx adj;
32389
32390 /* load nextafter (0.5, 0.0) */
32391 fmt = REAL_MODE_FORMAT (mode);
32392 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32393 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32394
32395 /* adj = copysign (0.5, op1) */
32396 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32397 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32398
32399 /* adj = op1 + adj */
32400 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32401
32402 /* op0 = (imode)adj */
32403 expand_fix (op0, adj, 0);
32404 }
32405
32406 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32407 into OPERAND0. */
32408 void
32409 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32410 {
32411 /* C code for the stuff we're doing below (for do_floor):
32412 xi = (long)op1;
32413 xi -= (double)xi > op1 ? 1 : 0;
32414 return xi;
32415 */
32416 enum machine_mode fmode = GET_MODE (op1);
32417 enum machine_mode imode = GET_MODE (op0);
32418 rtx ireg, freg, label, tmp;
32419
32420 /* reg = (long)op1 */
32421 ireg = gen_reg_rtx (imode);
32422 expand_fix (ireg, op1, 0);
32423
32424 /* freg = (double)reg */
32425 freg = gen_reg_rtx (fmode);
32426 expand_float (freg, ireg, 0);
32427
32428 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32429 label = ix86_expand_sse_compare_and_jump (UNLE,
32430 freg, op1, !do_floor);
32431 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32432 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32433 emit_move_insn (ireg, tmp);
32434
32435 emit_label (label);
32436 LABEL_NUSES (label) = 1;
32437
32438 emit_move_insn (op0, ireg);
32439 }
32440
32441 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32442 result in OPERAND0. */
32443 void
32444 ix86_expand_rint (rtx operand0, rtx operand1)
32445 {
32446 /* C code for the stuff we're doing below:
32447 xa = fabs (operand1);
32448 if (!isless (xa, 2**52))
32449 return operand1;
32450 xa = xa + 2**52 - 2**52;
32451 return copysign (xa, operand1);
32452 */
32453 enum machine_mode mode = GET_MODE (operand0);
32454 rtx res, xa, label, TWO52, mask;
32455
32456 res = gen_reg_rtx (mode);
32457 emit_move_insn (res, operand1);
32458
32459 /* xa = abs (operand1) */
32460 xa = ix86_expand_sse_fabs (res, &mask);
32461
32462 /* if (!isless (xa, TWO52)) goto label; */
32463 TWO52 = ix86_gen_TWO52 (mode);
32464 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32465
32466 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32467 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32468
32469 ix86_sse_copysign_to_positive (res, xa, res, mask);
32470
32471 emit_label (label);
32472 LABEL_NUSES (label) = 1;
32473
32474 emit_move_insn (operand0, res);
32475 }
32476
32477 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32478 into OPERAND0. */
32479 void
32480 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32481 {
32482 /* C code for the stuff we expand below.
32483 double xa = fabs (x), x2;
32484 if (!isless (xa, TWO52))
32485 return x;
32486 xa = xa + TWO52 - TWO52;
32487 x2 = copysign (xa, x);
32488 Compensate. Floor:
32489 if (x2 > x)
32490 x2 -= 1;
32491 Compensate. Ceil:
32492 if (x2 < x)
32493 x2 -= -1;
32494 return x2;
32495 */
32496 enum machine_mode mode = GET_MODE (operand0);
32497 rtx xa, TWO52, tmp, label, one, res, mask;
32498
32499 TWO52 = ix86_gen_TWO52 (mode);
32500
32501 /* Temporary for holding the result, initialized to the input
32502 operand to ease control flow. */
32503 res = gen_reg_rtx (mode);
32504 emit_move_insn (res, operand1);
32505
32506 /* xa = abs (operand1) */
32507 xa = ix86_expand_sse_fabs (res, &mask);
32508
32509 /* if (!isless (xa, TWO52)) goto label; */
32510 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32511
32512 /* xa = xa + TWO52 - TWO52; */
32513 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32514 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32515
32516 /* xa = copysign (xa, operand1) */
32517 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32518
32519 /* generate 1.0 or -1.0 */
32520 one = force_reg (mode,
32521 const_double_from_real_value (do_floor
32522 ? dconst1 : dconstm1, mode));
32523
32524 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32525 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32526 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32527 gen_rtx_AND (mode, one, tmp)));
32528 /* We always need to subtract here to preserve signed zero. */
32529 tmp = expand_simple_binop (mode, MINUS,
32530 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32531 emit_move_insn (res, tmp);
32532
32533 emit_label (label);
32534 LABEL_NUSES (label) = 1;
32535
32536 emit_move_insn (operand0, res);
32537 }
32538
32539 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32540 into OPERAND0. */
32541 void
32542 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32543 {
32544 /* C code for the stuff we expand below.
32545 double xa = fabs (x), x2;
32546 if (!isless (xa, TWO52))
32547 return x;
32548 x2 = (double)(long)x;
32549 Compensate. Floor:
32550 if (x2 > x)
32551 x2 -= 1;
32552 Compensate. Ceil:
32553 if (x2 < x)
32554 x2 += 1;
32555 if (HONOR_SIGNED_ZEROS (mode))
32556 return copysign (x2, x);
32557 return x2;
32558 */
32559 enum machine_mode mode = GET_MODE (operand0);
32560 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32561
32562 TWO52 = ix86_gen_TWO52 (mode);
32563
32564 /* Temporary for holding the result, initialized to the input
32565 operand to ease control flow. */
32566 res = gen_reg_rtx (mode);
32567 emit_move_insn (res, operand1);
32568
32569 /* xa = abs (operand1) */
32570 xa = ix86_expand_sse_fabs (res, &mask);
32571
32572 /* if (!isless (xa, TWO52)) goto label; */
32573 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32574
32575 /* xa = (double)(long)x */
32576 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32577 expand_fix (xi, res, 0);
32578 expand_float (xa, xi, 0);
32579
32580 /* generate 1.0 */
32581 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32582
32583 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32584 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32585 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32586 gen_rtx_AND (mode, one, tmp)));
32587 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32588 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32589 emit_move_insn (res, tmp);
32590
32591 if (HONOR_SIGNED_ZEROS (mode))
32592 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32593
32594 emit_label (label);
32595 LABEL_NUSES (label) = 1;
32596
32597 emit_move_insn (operand0, res);
32598 }
32599
32600 /* Expand SSE sequence for computing round from OPERAND1 storing
32601 into OPERAND0. Sequence that works without relying on DImode truncation
32602 via cvttsd2siq that is only available on 64bit targets. */
32603 void
32604 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32605 {
32606 /* C code for the stuff we expand below.
32607 double xa = fabs (x), xa2, x2;
32608 if (!isless (xa, TWO52))
32609 return x;
32610 Using the absolute value and copying back sign makes
32611 -0.0 -> -0.0 correct.
32612 xa2 = xa + TWO52 - TWO52;
32613 Compensate.
32614 dxa = xa2 - xa;
32615 if (dxa <= -0.5)
32616 xa2 += 1;
32617 else if (dxa > 0.5)
32618 xa2 -= 1;
32619 x2 = copysign (xa2, x);
32620 return x2;
32621 */
32622 enum machine_mode mode = GET_MODE (operand0);
32623 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32624
32625 TWO52 = ix86_gen_TWO52 (mode);
32626
32627 /* Temporary for holding the result, initialized to the input
32628 operand to ease control flow. */
32629 res = gen_reg_rtx (mode);
32630 emit_move_insn (res, operand1);
32631
32632 /* xa = abs (operand1) */
32633 xa = ix86_expand_sse_fabs (res, &mask);
32634
32635 /* if (!isless (xa, TWO52)) goto label; */
32636 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32637
32638 /* xa2 = xa + TWO52 - TWO52; */
32639 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32640 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32641
32642 /* dxa = xa2 - xa; */
32643 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32644
32645 /* generate 0.5, 1.0 and -0.5 */
32646 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32647 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32648 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32649 0, OPTAB_DIRECT);
32650
32651 /* Compensate. */
32652 tmp = gen_reg_rtx (mode);
32653 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32654 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32655 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32656 gen_rtx_AND (mode, one, tmp)));
32657 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32658 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32659 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32660 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32661 gen_rtx_AND (mode, one, tmp)));
32662 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32663
32664 /* res = copysign (xa2, operand1) */
32665 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32666
32667 emit_label (label);
32668 LABEL_NUSES (label) = 1;
32669
32670 emit_move_insn (operand0, res);
32671 }
32672
32673 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32674 into OPERAND0. */
32675 void
32676 ix86_expand_trunc (rtx operand0, rtx operand1)
32677 {
32678 /* C code for SSE variant we expand below.
32679 double xa = fabs (x), x2;
32680 if (!isless (xa, TWO52))
32681 return x;
32682 x2 = (double)(long)x;
32683 if (HONOR_SIGNED_ZEROS (mode))
32684 return copysign (x2, x);
32685 return x2;
32686 */
32687 enum machine_mode mode = GET_MODE (operand0);
32688 rtx xa, xi, TWO52, label, res, mask;
32689
32690 TWO52 = ix86_gen_TWO52 (mode);
32691
32692 /* Temporary for holding the result, initialized to the input
32693 operand to ease control flow. */
32694 res = gen_reg_rtx (mode);
32695 emit_move_insn (res, operand1);
32696
32697 /* xa = abs (operand1) */
32698 xa = ix86_expand_sse_fabs (res, &mask);
32699
32700 /* if (!isless (xa, TWO52)) goto label; */
32701 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32702
32703 /* x = (double)(long)x */
32704 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32705 expand_fix (xi, res, 0);
32706 expand_float (res, xi, 0);
32707
32708 if (HONOR_SIGNED_ZEROS (mode))
32709 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32710
32711 emit_label (label);
32712 LABEL_NUSES (label) = 1;
32713
32714 emit_move_insn (operand0, res);
32715 }
32716
32717 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32718 into OPERAND0. */
32719 void
32720 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32721 {
32722 enum machine_mode mode = GET_MODE (operand0);
32723 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32724
32725 /* C code for SSE variant we expand below.
32726 double xa = fabs (x), x2;
32727 if (!isless (xa, TWO52))
32728 return x;
32729 xa2 = xa + TWO52 - TWO52;
32730 Compensate:
32731 if (xa2 > xa)
32732 xa2 -= 1.0;
32733 x2 = copysign (xa2, x);
32734 return x2;
32735 */
32736
32737 TWO52 = ix86_gen_TWO52 (mode);
32738
32739 /* Temporary for holding the result, initialized to the input
32740 operand to ease control flow. */
32741 res = gen_reg_rtx (mode);
32742 emit_move_insn (res, operand1);
32743
32744 /* xa = abs (operand1) */
32745 xa = ix86_expand_sse_fabs (res, &smask);
32746
32747 /* if (!isless (xa, TWO52)) goto label; */
32748 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32749
32750 /* res = xa + TWO52 - TWO52; */
32751 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32752 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32753 emit_move_insn (res, tmp);
32754
32755 /* generate 1.0 */
32756 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32757
32758 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32759 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32760 emit_insn (gen_rtx_SET (VOIDmode, mask,
32761 gen_rtx_AND (mode, mask, one)));
32762 tmp = expand_simple_binop (mode, MINUS,
32763 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32764 emit_move_insn (res, tmp);
32765
32766 /* res = copysign (res, operand1) */
32767 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32768
32769 emit_label (label);
32770 LABEL_NUSES (label) = 1;
32771
32772 emit_move_insn (operand0, res);
32773 }
32774
32775 /* Expand SSE sequence for computing round from OPERAND1 storing
32776 into OPERAND0. */
32777 void
32778 ix86_expand_round (rtx operand0, rtx operand1)
32779 {
32780 /* C code for the stuff we're doing below:
32781 double xa = fabs (x);
32782 if (!isless (xa, TWO52))
32783 return x;
32784 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32785 return copysign (xa, x);
32786 */
32787 enum machine_mode mode = GET_MODE (operand0);
32788 rtx res, TWO52, xa, label, xi, half, mask;
32789 const struct real_format *fmt;
32790 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32791
32792 /* Temporary for holding the result, initialized to the input
32793 operand to ease control flow. */
32794 res = gen_reg_rtx (mode);
32795 emit_move_insn (res, operand1);
32796
32797 TWO52 = ix86_gen_TWO52 (mode);
32798 xa = ix86_expand_sse_fabs (res, &mask);
32799 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32800
32801 /* load nextafter (0.5, 0.0) */
32802 fmt = REAL_MODE_FORMAT (mode);
32803 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32804 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32805
32806 /* xa = xa + 0.5 */
32807 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32808 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32809
32810 /* xa = (double)(int64_t)xa */
32811 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32812 expand_fix (xi, xa, 0);
32813 expand_float (xa, xi, 0);
32814
32815 /* res = copysign (xa, operand1) */
32816 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32817
32818 emit_label (label);
32819 LABEL_NUSES (label) = 1;
32820
32821 emit_move_insn (operand0, res);
32822 }
32823 \f
32824
32825 /* Table of valid machine attributes. */
32826 static const struct attribute_spec ix86_attribute_table[] =
32827 {
32828 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32829 affects_type_identity } */
32830 /* Stdcall attribute says callee is responsible for popping arguments
32831 if they are not variable. */
32832 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32833 true },
32834 /* Fastcall attribute says callee is responsible for popping arguments
32835 if they are not variable. */
32836 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32837 true },
32838 /* Thiscall attribute says callee is responsible for popping arguments
32839 if they are not variable. */
32840 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32841 true },
32842 /* Cdecl attribute says the callee is a normal C declaration */
32843 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32844 true },
32845 /* Regparm attribute specifies how many integer arguments are to be
32846 passed in registers. */
32847 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32848 true },
32849 /* Sseregparm attribute says we are using x86_64 calling conventions
32850 for FP arguments. */
32851 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32852 true },
32853 /* force_align_arg_pointer says this function realigns the stack at entry. */
32854 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32855 false, true, true, ix86_handle_cconv_attribute, false },
32856 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32857 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32858 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32859 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32860 false },
32861 #endif
32862 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32863 false },
32864 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32865 false },
32866 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32867 SUBTARGET_ATTRIBUTE_TABLE,
32868 #endif
32869 /* ms_abi and sysv_abi calling convention function attributes. */
32870 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32871 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32872 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32873 false },
32874 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32875 ix86_handle_callee_pop_aggregate_return, true },
32876 /* End element. */
32877 { NULL, 0, 0, false, false, false, NULL, false }
32878 };
32879
32880 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32881 static int
32882 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32883 tree vectype ATTRIBUTE_UNUSED,
32884 int misalign ATTRIBUTE_UNUSED)
32885 {
32886 switch (type_of_cost)
32887 {
32888 case scalar_stmt:
32889 return ix86_cost->scalar_stmt_cost;
32890
32891 case scalar_load:
32892 return ix86_cost->scalar_load_cost;
32893
32894 case scalar_store:
32895 return ix86_cost->scalar_store_cost;
32896
32897 case vector_stmt:
32898 return ix86_cost->vec_stmt_cost;
32899
32900 case vector_load:
32901 return ix86_cost->vec_align_load_cost;
32902
32903 case vector_store:
32904 return ix86_cost->vec_store_cost;
32905
32906 case vec_to_scalar:
32907 return ix86_cost->vec_to_scalar_cost;
32908
32909 case scalar_to_vec:
32910 return ix86_cost->scalar_to_vec_cost;
32911
32912 case unaligned_load:
32913 case unaligned_store:
32914 return ix86_cost->vec_unalign_load_cost;
32915
32916 case cond_branch_taken:
32917 return ix86_cost->cond_taken_branch_cost;
32918
32919 case cond_branch_not_taken:
32920 return ix86_cost->cond_not_taken_branch_cost;
32921
32922 case vec_perm:
32923 return 1;
32924
32925 default:
32926 gcc_unreachable ();
32927 }
32928 }
32929
32930
32931 /* Implement targetm.vectorize.builtin_vec_perm. */
32932
32933 static tree
32934 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32935 {
32936 tree itype = TREE_TYPE (vec_type);
32937 bool u = TYPE_UNSIGNED (itype);
32938 enum machine_mode vmode = TYPE_MODE (vec_type);
32939 enum ix86_builtins fcode;
32940 bool ok = TARGET_SSE2;
32941
32942 switch (vmode)
32943 {
32944 case V4DFmode:
32945 ok = TARGET_AVX;
32946 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32947 goto get_di;
32948 case V2DFmode:
32949 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32950 get_di:
32951 itype = ix86_get_builtin_type (IX86_BT_DI);
32952 break;
32953
32954 case V8SFmode:
32955 ok = TARGET_AVX;
32956 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32957 goto get_si;
32958 case V4SFmode:
32959 ok = TARGET_SSE;
32960 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32961 get_si:
32962 itype = ix86_get_builtin_type (IX86_BT_SI);
32963 break;
32964
32965 case V2DImode:
32966 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32967 break;
32968 case V4SImode:
32969 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32970 break;
32971 case V8HImode:
32972 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32973 break;
32974 case V16QImode:
32975 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32976 break;
32977 default:
32978 ok = false;
32979 break;
32980 }
32981
32982 if (!ok)
32983 return NULL_TREE;
32984
32985 *mask_type = itype;
32986 return ix86_builtins[(int) fcode];
32987 }
32988
32989 /* Return a vector mode with twice as many elements as VMODE. */
32990 /* ??? Consider moving this to a table generated by genmodes.c. */
32991
32992 static enum machine_mode
32993 doublesize_vector_mode (enum machine_mode vmode)
32994 {
32995 switch (vmode)
32996 {
32997 case V2SFmode: return V4SFmode;
32998 case V1DImode: return V2DImode;
32999 case V2SImode: return V4SImode;
33000 case V4HImode: return V8HImode;
33001 case V8QImode: return V16QImode;
33002
33003 case V2DFmode: return V4DFmode;
33004 case V4SFmode: return V8SFmode;
33005 case V2DImode: return V4DImode;
33006 case V4SImode: return V8SImode;
33007 case V8HImode: return V16HImode;
33008 case V16QImode: return V32QImode;
33009
33010 case V4DFmode: return V8DFmode;
33011 case V8SFmode: return V16SFmode;
33012 case V4DImode: return V8DImode;
33013 case V8SImode: return V16SImode;
33014 case V16HImode: return V32HImode;
33015 case V32QImode: return V64QImode;
33016
33017 default:
33018 gcc_unreachable ();
33019 }
33020 }
33021
33022 /* Construct (set target (vec_select op0 (parallel perm))) and
33023 return true if that's a valid instruction in the active ISA. */
33024
33025 static bool
33026 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33027 {
33028 rtx rperm[MAX_VECT_LEN], x;
33029 unsigned i;
33030
33031 for (i = 0; i < nelt; ++i)
33032 rperm[i] = GEN_INT (perm[i]);
33033
33034 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33035 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33036 x = gen_rtx_SET (VOIDmode, target, x);
33037
33038 x = emit_insn (x);
33039 if (recog_memoized (x) < 0)
33040 {
33041 remove_insn (x);
33042 return false;
33043 }
33044 return true;
33045 }
33046
33047 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33048
33049 static bool
33050 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33051 const unsigned char *perm, unsigned nelt)
33052 {
33053 enum machine_mode v2mode;
33054 rtx x;
33055
33056 v2mode = doublesize_vector_mode (GET_MODE (op0));
33057 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33058 return expand_vselect (target, x, perm, nelt);
33059 }
33060
33061 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33062 in terms of blendp[sd] / pblendw / pblendvb. */
33063
33064 static bool
33065 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33066 {
33067 enum machine_mode vmode = d->vmode;
33068 unsigned i, mask, nelt = d->nelt;
33069 rtx target, op0, op1, x;
33070
33071 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33072 return false;
33073 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33074 return false;
33075
33076 /* This is a blend, not a permute. Elements must stay in their
33077 respective lanes. */
33078 for (i = 0; i < nelt; ++i)
33079 {
33080 unsigned e = d->perm[i];
33081 if (!(e == i || e == i + nelt))
33082 return false;
33083 }
33084
33085 if (d->testing_p)
33086 return true;
33087
33088 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33089 decision should be extracted elsewhere, so that we only try that
33090 sequence once all budget==3 options have been tried. */
33091
33092 /* For bytes, see if bytes move in pairs so we can use pblendw with
33093 an immediate argument, rather than pblendvb with a vector argument. */
33094 if (vmode == V16QImode)
33095 {
33096 bool pblendw_ok = true;
33097 for (i = 0; i < 16 && pblendw_ok; i += 2)
33098 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33099
33100 if (!pblendw_ok)
33101 {
33102 rtx rperm[16], vperm;
33103
33104 for (i = 0; i < nelt; ++i)
33105 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33106
33107 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33108 vperm = force_reg (V16QImode, vperm);
33109
33110 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33111 return true;
33112 }
33113 }
33114
33115 target = d->target;
33116 op0 = d->op0;
33117 op1 = d->op1;
33118 mask = 0;
33119
33120 switch (vmode)
33121 {
33122 case V4DFmode:
33123 case V8SFmode:
33124 case V2DFmode:
33125 case V4SFmode:
33126 case V8HImode:
33127 for (i = 0; i < nelt; ++i)
33128 mask |= (d->perm[i] >= nelt) << i;
33129 break;
33130
33131 case V2DImode:
33132 for (i = 0; i < 2; ++i)
33133 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33134 goto do_subreg;
33135
33136 case V4SImode:
33137 for (i = 0; i < 4; ++i)
33138 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33139 goto do_subreg;
33140
33141 case V16QImode:
33142 for (i = 0; i < 8; ++i)
33143 mask |= (d->perm[i * 2] >= 16) << i;
33144
33145 do_subreg:
33146 vmode = V8HImode;
33147 target = gen_lowpart (vmode, target);
33148 op0 = gen_lowpart (vmode, op0);
33149 op1 = gen_lowpart (vmode, op1);
33150 break;
33151
33152 default:
33153 gcc_unreachable ();
33154 }
33155
33156 /* This matches five different patterns with the different modes. */
33157 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33158 x = gen_rtx_SET (VOIDmode, target, x);
33159 emit_insn (x);
33160
33161 return true;
33162 }
33163
33164 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33165 in terms of the variable form of vpermilps.
33166
33167 Note that we will have already failed the immediate input vpermilps,
33168 which requires that the high and low part shuffle be identical; the
33169 variable form doesn't require that. */
33170
33171 static bool
33172 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33173 {
33174 rtx rperm[8], vperm;
33175 unsigned i;
33176
33177 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33178 return false;
33179
33180 /* We can only permute within the 128-bit lane. */
33181 for (i = 0; i < 8; ++i)
33182 {
33183 unsigned e = d->perm[i];
33184 if (i < 4 ? e >= 4 : e < 4)
33185 return false;
33186 }
33187
33188 if (d->testing_p)
33189 return true;
33190
33191 for (i = 0; i < 8; ++i)
33192 {
33193 unsigned e = d->perm[i];
33194
33195 /* Within each 128-bit lane, the elements of op0 are numbered
33196 from 0 and the elements of op1 are numbered from 4. */
33197 if (e >= 8 + 4)
33198 e -= 8;
33199 else if (e >= 4)
33200 e -= 4;
33201
33202 rperm[i] = GEN_INT (e);
33203 }
33204
33205 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33206 vperm = force_reg (V8SImode, vperm);
33207 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33208
33209 return true;
33210 }
33211
33212 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33213 in terms of pshufb or vpperm. */
33214
33215 static bool
33216 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33217 {
33218 unsigned i, nelt, eltsz;
33219 rtx rperm[16], vperm, target, op0, op1;
33220
33221 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33222 return false;
33223 if (GET_MODE_SIZE (d->vmode) != 16)
33224 return false;
33225
33226 if (d->testing_p)
33227 return true;
33228
33229 nelt = d->nelt;
33230 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33231
33232 for (i = 0; i < nelt; ++i)
33233 {
33234 unsigned j, e = d->perm[i];
33235 for (j = 0; j < eltsz; ++j)
33236 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33237 }
33238
33239 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33240 vperm = force_reg (V16QImode, vperm);
33241
33242 target = gen_lowpart (V16QImode, d->target);
33243 op0 = gen_lowpart (V16QImode, d->op0);
33244 if (d->op0 == d->op1)
33245 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33246 else
33247 {
33248 op1 = gen_lowpart (V16QImode, d->op1);
33249 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33250 }
33251
33252 return true;
33253 }
33254
33255 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33256 in a single instruction. */
33257
33258 static bool
33259 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33260 {
33261 unsigned i, nelt = d->nelt;
33262 unsigned char perm2[MAX_VECT_LEN];
33263
33264 /* Check plain VEC_SELECT first, because AVX has instructions that could
33265 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33266 input where SEL+CONCAT may not. */
33267 if (d->op0 == d->op1)
33268 {
33269 int mask = nelt - 1;
33270
33271 for (i = 0; i < nelt; i++)
33272 perm2[i] = d->perm[i] & mask;
33273
33274 if (expand_vselect (d->target, d->op0, perm2, nelt))
33275 return true;
33276
33277 /* There are plenty of patterns in sse.md that are written for
33278 SEL+CONCAT and are not replicated for a single op. Perhaps
33279 that should be changed, to avoid the nastiness here. */
33280
33281 /* Recognize interleave style patterns, which means incrementing
33282 every other permutation operand. */
33283 for (i = 0; i < nelt; i += 2)
33284 {
33285 perm2[i] = d->perm[i] & mask;
33286 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33287 }
33288 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33289 return true;
33290
33291 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33292 if (nelt >= 4)
33293 {
33294 for (i = 0; i < nelt; i += 4)
33295 {
33296 perm2[i + 0] = d->perm[i + 0] & mask;
33297 perm2[i + 1] = d->perm[i + 1] & mask;
33298 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33299 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33300 }
33301
33302 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33303 return true;
33304 }
33305 }
33306
33307 /* Finally, try the fully general two operand permute. */
33308 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33309 return true;
33310
33311 /* Recognize interleave style patterns with reversed operands. */
33312 if (d->op0 != d->op1)
33313 {
33314 for (i = 0; i < nelt; ++i)
33315 {
33316 unsigned e = d->perm[i];
33317 if (e >= nelt)
33318 e -= nelt;
33319 else
33320 e += nelt;
33321 perm2[i] = e;
33322 }
33323
33324 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33325 return true;
33326 }
33327
33328 /* Try the SSE4.1 blend variable merge instructions. */
33329 if (expand_vec_perm_blend (d))
33330 return true;
33331
33332 /* Try one of the AVX vpermil variable permutations. */
33333 if (expand_vec_perm_vpermil (d))
33334 return true;
33335
33336 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33337 if (expand_vec_perm_pshufb (d))
33338 return true;
33339
33340 return false;
33341 }
33342
33343 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33344 in terms of a pair of pshuflw + pshufhw instructions. */
33345
33346 static bool
33347 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33348 {
33349 unsigned char perm2[MAX_VECT_LEN];
33350 unsigned i;
33351 bool ok;
33352
33353 if (d->vmode != V8HImode || d->op0 != d->op1)
33354 return false;
33355
33356 /* The two permutations only operate in 64-bit lanes. */
33357 for (i = 0; i < 4; ++i)
33358 if (d->perm[i] >= 4)
33359 return false;
33360 for (i = 4; i < 8; ++i)
33361 if (d->perm[i] < 4)
33362 return false;
33363
33364 if (d->testing_p)
33365 return true;
33366
33367 /* Emit the pshuflw. */
33368 memcpy (perm2, d->perm, 4);
33369 for (i = 4; i < 8; ++i)
33370 perm2[i] = i;
33371 ok = expand_vselect (d->target, d->op0, perm2, 8);
33372 gcc_assert (ok);
33373
33374 /* Emit the pshufhw. */
33375 memcpy (perm2 + 4, d->perm + 4, 4);
33376 for (i = 0; i < 4; ++i)
33377 perm2[i] = i;
33378 ok = expand_vselect (d->target, d->target, perm2, 8);
33379 gcc_assert (ok);
33380
33381 return true;
33382 }
33383
33384 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33385 the permutation using the SSSE3 palignr instruction. This succeeds
33386 when all of the elements in PERM fit within one vector and we merely
33387 need to shift them down so that a single vector permutation has a
33388 chance to succeed. */
33389
33390 static bool
33391 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33392 {
33393 unsigned i, nelt = d->nelt;
33394 unsigned min, max;
33395 bool in_order, ok;
33396 rtx shift;
33397
33398 /* Even with AVX, palignr only operates on 128-bit vectors. */
33399 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33400 return false;
33401
33402 min = nelt, max = 0;
33403 for (i = 0; i < nelt; ++i)
33404 {
33405 unsigned e = d->perm[i];
33406 if (e < min)
33407 min = e;
33408 if (e > max)
33409 max = e;
33410 }
33411 if (min == 0 || max - min >= nelt)
33412 return false;
33413
33414 /* Given that we have SSSE3, we know we'll be able to implement the
33415 single operand permutation after the palignr with pshufb. */
33416 if (d->testing_p)
33417 return true;
33418
33419 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33420 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33421 gen_lowpart (TImode, d->op1),
33422 gen_lowpart (TImode, d->op0), shift));
33423
33424 d->op0 = d->op1 = d->target;
33425
33426 in_order = true;
33427 for (i = 0; i < nelt; ++i)
33428 {
33429 unsigned e = d->perm[i] - min;
33430 if (e != i)
33431 in_order = false;
33432 d->perm[i] = e;
33433 }
33434
33435 /* Test for the degenerate case where the alignment by itself
33436 produces the desired permutation. */
33437 if (in_order)
33438 return true;
33439
33440 ok = expand_vec_perm_1 (d);
33441 gcc_assert (ok);
33442
33443 return ok;
33444 }
33445
33446 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33447 a two vector permutation into a single vector permutation by using
33448 an interleave operation to merge the vectors. */
33449
33450 static bool
33451 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33452 {
33453 struct expand_vec_perm_d dremap, dfinal;
33454 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33455 unsigned contents, h1, h2, h3, h4;
33456 unsigned char remap[2 * MAX_VECT_LEN];
33457 rtx seq;
33458 bool ok;
33459
33460 if (d->op0 == d->op1)
33461 return false;
33462
33463 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33464 lanes. We can use similar techniques with the vperm2f128 instruction,
33465 but it requires slightly different logic. */
33466 if (GET_MODE_SIZE (d->vmode) != 16)
33467 return false;
33468
33469 /* Examine from whence the elements come. */
33470 contents = 0;
33471 for (i = 0; i < nelt; ++i)
33472 contents |= 1u << d->perm[i];
33473
33474 /* Split the two input vectors into 4 halves. */
33475 h1 = (1u << nelt2) - 1;
33476 h2 = h1 << nelt2;
33477 h3 = h2 << nelt2;
33478 h4 = h3 << nelt2;
33479
33480 memset (remap, 0xff, sizeof (remap));
33481 dremap = *d;
33482
33483 /* If the elements from the low halves use interleave low, and similarly
33484 for interleave high. If the elements are from mis-matched halves, we
33485 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33486 if ((contents & (h1 | h3)) == contents)
33487 {
33488 for (i = 0; i < nelt2; ++i)
33489 {
33490 remap[i] = i * 2;
33491 remap[i + nelt] = i * 2 + 1;
33492 dremap.perm[i * 2] = i;
33493 dremap.perm[i * 2 + 1] = i + nelt;
33494 }
33495 }
33496 else if ((contents & (h2 | h4)) == contents)
33497 {
33498 for (i = 0; i < nelt2; ++i)
33499 {
33500 remap[i + nelt2] = i * 2;
33501 remap[i + nelt + nelt2] = i * 2 + 1;
33502 dremap.perm[i * 2] = i + nelt2;
33503 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33504 }
33505 }
33506 else if ((contents & (h1 | h4)) == contents)
33507 {
33508 for (i = 0; i < nelt2; ++i)
33509 {
33510 remap[i] = i;
33511 remap[i + nelt + nelt2] = i + nelt2;
33512 dremap.perm[i] = i;
33513 dremap.perm[i + nelt2] = i + nelt + nelt2;
33514 }
33515 if (nelt != 4)
33516 {
33517 dremap.vmode = V2DImode;
33518 dremap.nelt = 2;
33519 dremap.perm[0] = 0;
33520 dremap.perm[1] = 3;
33521 }
33522 }
33523 else if ((contents & (h2 | h3)) == contents)
33524 {
33525 for (i = 0; i < nelt2; ++i)
33526 {
33527 remap[i + nelt2] = i;
33528 remap[i + nelt] = i + nelt2;
33529 dremap.perm[i] = i + nelt2;
33530 dremap.perm[i + nelt2] = i + nelt;
33531 }
33532 if (nelt != 4)
33533 {
33534 dremap.vmode = V2DImode;
33535 dremap.nelt = 2;
33536 dremap.perm[0] = 1;
33537 dremap.perm[1] = 2;
33538 }
33539 }
33540 else
33541 return false;
33542
33543 /* Use the remapping array set up above to move the elements from their
33544 swizzled locations into their final destinations. */
33545 dfinal = *d;
33546 for (i = 0; i < nelt; ++i)
33547 {
33548 unsigned e = remap[d->perm[i]];
33549 gcc_assert (e < nelt);
33550 dfinal.perm[i] = e;
33551 }
33552 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33553 dfinal.op1 = dfinal.op0;
33554 dremap.target = dfinal.op0;
33555
33556 /* Test if the final remap can be done with a single insn. For V4SFmode or
33557 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33558 start_sequence ();
33559 ok = expand_vec_perm_1 (&dfinal);
33560 seq = get_insns ();
33561 end_sequence ();
33562
33563 if (!ok)
33564 return false;
33565
33566 if (dremap.vmode != dfinal.vmode)
33567 {
33568 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33569 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33570 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33571 }
33572
33573 ok = expand_vec_perm_1 (&dremap);
33574 gcc_assert (ok);
33575
33576 emit_insn (seq);
33577 return true;
33578 }
33579
33580 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33581 permutation with two pshufb insns and an ior. We should have already
33582 failed all two instruction sequences. */
33583
33584 static bool
33585 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33586 {
33587 rtx rperm[2][16], vperm, l, h, op, m128;
33588 unsigned int i, nelt, eltsz;
33589
33590 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33591 return false;
33592 gcc_assert (d->op0 != d->op1);
33593
33594 nelt = d->nelt;
33595 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33596
33597 /* Generate two permutation masks. If the required element is within
33598 the given vector it is shuffled into the proper lane. If the required
33599 element is in the other vector, force a zero into the lane by setting
33600 bit 7 in the permutation mask. */
33601 m128 = GEN_INT (-128);
33602 for (i = 0; i < nelt; ++i)
33603 {
33604 unsigned j, e = d->perm[i];
33605 unsigned which = (e >= nelt);
33606 if (e >= nelt)
33607 e -= nelt;
33608
33609 for (j = 0; j < eltsz; ++j)
33610 {
33611 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33612 rperm[1-which][i*eltsz + j] = m128;
33613 }
33614 }
33615
33616 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33617 vperm = force_reg (V16QImode, vperm);
33618
33619 l = gen_reg_rtx (V16QImode);
33620 op = gen_lowpart (V16QImode, d->op0);
33621 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33622
33623 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33624 vperm = force_reg (V16QImode, vperm);
33625
33626 h = gen_reg_rtx (V16QImode);
33627 op = gen_lowpart (V16QImode, d->op1);
33628 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33629
33630 op = gen_lowpart (V16QImode, d->target);
33631 emit_insn (gen_iorv16qi3 (op, l, h));
33632
33633 return true;
33634 }
33635
33636 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33637 and extract-odd permutations. */
33638
33639 static bool
33640 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33641 {
33642 rtx t1, t2, t3;
33643
33644 switch (d->vmode)
33645 {
33646 case V4DFmode:
33647 t1 = gen_reg_rtx (V4DFmode);
33648 t2 = gen_reg_rtx (V4DFmode);
33649
33650 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33651 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33652 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33653
33654 /* Now an unpck[lh]pd will produce the result required. */
33655 if (odd)
33656 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33657 else
33658 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33659 emit_insn (t3);
33660 break;
33661
33662 case V8SFmode:
33663 {
33664 int mask = odd ? 0xdd : 0x88;
33665
33666 t1 = gen_reg_rtx (V8SFmode);
33667 t2 = gen_reg_rtx (V8SFmode);
33668 t3 = gen_reg_rtx (V8SFmode);
33669
33670 /* Shuffle within the 128-bit lanes to produce:
33671 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33672 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33673 GEN_INT (mask)));
33674
33675 /* Shuffle the lanes around to produce:
33676 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33677 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33678 GEN_INT (0x3)));
33679
33680 /* Shuffle within the 128-bit lanes to produce:
33681 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33682 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33683
33684 /* Shuffle within the 128-bit lanes to produce:
33685 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33686 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33687
33688 /* Shuffle the lanes around to produce:
33689 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33690 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33691 GEN_INT (0x20)));
33692 }
33693 break;
33694
33695 case V2DFmode:
33696 case V4SFmode:
33697 case V2DImode:
33698 case V4SImode:
33699 /* These are always directly implementable by expand_vec_perm_1. */
33700 gcc_unreachable ();
33701
33702 case V8HImode:
33703 if (TARGET_SSSE3)
33704 return expand_vec_perm_pshufb2 (d);
33705 else
33706 {
33707 /* We need 2*log2(N)-1 operations to achieve odd/even
33708 with interleave. */
33709 t1 = gen_reg_rtx (V8HImode);
33710 t2 = gen_reg_rtx (V8HImode);
33711 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33712 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33713 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33714 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33715 if (odd)
33716 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33717 else
33718 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33719 emit_insn (t3);
33720 }
33721 break;
33722
33723 case V16QImode:
33724 if (TARGET_SSSE3)
33725 return expand_vec_perm_pshufb2 (d);
33726 else
33727 {
33728 t1 = gen_reg_rtx (V16QImode);
33729 t2 = gen_reg_rtx (V16QImode);
33730 t3 = gen_reg_rtx (V16QImode);
33731 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33732 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33733 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33734 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33735 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33736 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33737 if (odd)
33738 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33739 else
33740 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33741 emit_insn (t3);
33742 }
33743 break;
33744
33745 default:
33746 gcc_unreachable ();
33747 }
33748
33749 return true;
33750 }
33751
33752 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33753 extract-even and extract-odd permutations. */
33754
33755 static bool
33756 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33757 {
33758 unsigned i, odd, nelt = d->nelt;
33759
33760 odd = d->perm[0];
33761 if (odd != 0 && odd != 1)
33762 return false;
33763
33764 for (i = 1; i < nelt; ++i)
33765 if (d->perm[i] != 2 * i + odd)
33766 return false;
33767
33768 return expand_vec_perm_even_odd_1 (d, odd);
33769 }
33770
33771 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33772 permutations. We assume that expand_vec_perm_1 has already failed. */
33773
33774 static bool
33775 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33776 {
33777 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33778 enum machine_mode vmode = d->vmode;
33779 unsigned char perm2[4];
33780 rtx op0 = d->op0;
33781 bool ok;
33782
33783 switch (vmode)
33784 {
33785 case V4DFmode:
33786 case V8SFmode:
33787 /* These are special-cased in sse.md so that we can optionally
33788 use the vbroadcast instruction. They expand to two insns
33789 if the input happens to be in a register. */
33790 gcc_unreachable ();
33791
33792 case V2DFmode:
33793 case V2DImode:
33794 case V4SFmode:
33795 case V4SImode:
33796 /* These are always implementable using standard shuffle patterns. */
33797 gcc_unreachable ();
33798
33799 case V8HImode:
33800 case V16QImode:
33801 /* These can be implemented via interleave. We save one insn by
33802 stopping once we have promoted to V4SImode and then use pshufd. */
33803 do
33804 {
33805 optab otab = vec_interleave_low_optab;
33806
33807 if (elt >= nelt2)
33808 {
33809 otab = vec_interleave_high_optab;
33810 elt -= nelt2;
33811 }
33812 nelt2 /= 2;
33813
33814 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33815 vmode = get_mode_wider_vector (vmode);
33816 op0 = gen_lowpart (vmode, op0);
33817 }
33818 while (vmode != V4SImode);
33819
33820 memset (perm2, elt, 4);
33821 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33822 gcc_assert (ok);
33823 return true;
33824
33825 default:
33826 gcc_unreachable ();
33827 }
33828 }
33829
33830 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33831 broadcast permutations. */
33832
33833 static bool
33834 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33835 {
33836 unsigned i, elt, nelt = d->nelt;
33837
33838 if (d->op0 != d->op1)
33839 return false;
33840
33841 elt = d->perm[0];
33842 for (i = 1; i < nelt; ++i)
33843 if (d->perm[i] != elt)
33844 return false;
33845
33846 return expand_vec_perm_broadcast_1 (d);
33847 }
33848
33849 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33850 With all of the interface bits taken care of, perform the expansion
33851 in D and return true on success. */
33852
33853 static bool
33854 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33855 {
33856 /* Try a single instruction expansion. */
33857 if (expand_vec_perm_1 (d))
33858 return true;
33859
33860 /* Try sequences of two instructions. */
33861
33862 if (expand_vec_perm_pshuflw_pshufhw (d))
33863 return true;
33864
33865 if (expand_vec_perm_palignr (d))
33866 return true;
33867
33868 if (expand_vec_perm_interleave2 (d))
33869 return true;
33870
33871 if (expand_vec_perm_broadcast (d))
33872 return true;
33873
33874 /* Try sequences of three instructions. */
33875
33876 if (expand_vec_perm_pshufb2 (d))
33877 return true;
33878
33879 /* ??? Look for narrow permutations whose element orderings would
33880 allow the promotion to a wider mode. */
33881
33882 /* ??? Look for sequences of interleave or a wider permute that place
33883 the data into the correct lanes for a half-vector shuffle like
33884 pshuf[lh]w or vpermilps. */
33885
33886 /* ??? Look for sequences of interleave that produce the desired results.
33887 The combinatorics of punpck[lh] get pretty ugly... */
33888
33889 if (expand_vec_perm_even_odd (d))
33890 return true;
33891
33892 return false;
33893 }
33894
33895 /* Extract the values from the vector CST into the permutation array in D.
33896 Return 0 on error, 1 if all values from the permutation come from the
33897 first vector, 2 if all values from the second vector, and 3 otherwise. */
33898
33899 static int
33900 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33901 {
33902 tree list = TREE_VECTOR_CST_ELTS (cst);
33903 unsigned i, nelt = d->nelt;
33904 int ret = 0;
33905
33906 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33907 {
33908 unsigned HOST_WIDE_INT e;
33909
33910 if (!host_integerp (TREE_VALUE (list), 1))
33911 return 0;
33912 e = tree_low_cst (TREE_VALUE (list), 1);
33913 if (e >= 2 * nelt)
33914 return 0;
33915
33916 ret |= (e < nelt ? 1 : 2);
33917 d->perm[i] = e;
33918 }
33919 gcc_assert (list == NULL);
33920
33921 /* For all elements from second vector, fold the elements to first. */
33922 if (ret == 2)
33923 for (i = 0; i < nelt; ++i)
33924 d->perm[i] -= nelt;
33925
33926 return ret;
33927 }
33928
33929 static rtx
33930 ix86_expand_vec_perm_builtin (tree exp)
33931 {
33932 struct expand_vec_perm_d d;
33933 tree arg0, arg1, arg2;
33934
33935 arg0 = CALL_EXPR_ARG (exp, 0);
33936 arg1 = CALL_EXPR_ARG (exp, 1);
33937 arg2 = CALL_EXPR_ARG (exp, 2);
33938
33939 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33940 d.nelt = GET_MODE_NUNITS (d.vmode);
33941 d.testing_p = false;
33942 gcc_assert (VECTOR_MODE_P (d.vmode));
33943
33944 if (TREE_CODE (arg2) != VECTOR_CST)
33945 {
33946 error_at (EXPR_LOCATION (exp),
33947 "vector permutation requires vector constant");
33948 goto exit_error;
33949 }
33950
33951 switch (extract_vec_perm_cst (&d, arg2))
33952 {
33953 default:
33954 gcc_unreachable();
33955
33956 case 0:
33957 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33958 goto exit_error;
33959
33960 case 3:
33961 if (!operand_equal_p (arg0, arg1, 0))
33962 {
33963 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33964 d.op0 = force_reg (d.vmode, d.op0);
33965 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33966 d.op1 = force_reg (d.vmode, d.op1);
33967 break;
33968 }
33969
33970 /* The elements of PERM do not suggest that only the first operand
33971 is used, but both operands are identical. Allow easier matching
33972 of the permutation by folding the permutation into the single
33973 input vector. */
33974 {
33975 unsigned i, nelt = d.nelt;
33976 for (i = 0; i < nelt; ++i)
33977 if (d.perm[i] >= nelt)
33978 d.perm[i] -= nelt;
33979 }
33980 /* FALLTHRU */
33981
33982 case 1:
33983 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33984 d.op0 = force_reg (d.vmode, d.op0);
33985 d.op1 = d.op0;
33986 break;
33987
33988 case 2:
33989 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33990 d.op0 = force_reg (d.vmode, d.op0);
33991 d.op1 = d.op0;
33992 break;
33993 }
33994
33995 d.target = gen_reg_rtx (d.vmode);
33996 if (ix86_expand_vec_perm_builtin_1 (&d))
33997 return d.target;
33998
33999 /* For compiler generated permutations, we should never got here, because
34000 the compiler should also be checking the ok hook. But since this is a
34001 builtin the user has access too, so don't abort. */
34002 switch (d.nelt)
34003 {
34004 case 2:
34005 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
34006 break;
34007 case 4:
34008 sorry ("vector permutation (%d %d %d %d)",
34009 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
34010 break;
34011 case 8:
34012 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
34013 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34014 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
34015 break;
34016 case 16:
34017 sorry ("vector permutation "
34018 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
34019 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34020 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
34021 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
34022 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34023 break;
34024 default:
34025 gcc_unreachable ();
34026 }
34027 exit_error:
34028 return CONST0_RTX (d.vmode);
34029 }
34030
34031 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34032
34033 static bool
34034 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34035 {
34036 struct expand_vec_perm_d d;
34037 int vec_mask;
34038 bool ret, one_vec;
34039
34040 d.vmode = TYPE_MODE (vec_type);
34041 d.nelt = GET_MODE_NUNITS (d.vmode);
34042 d.testing_p = true;
34043
34044 /* Given sufficient ISA support we can just return true here
34045 for selected vector modes. */
34046 if (GET_MODE_SIZE (d.vmode) == 16)
34047 {
34048 /* All implementable with a single vpperm insn. */
34049 if (TARGET_XOP)
34050 return true;
34051 /* All implementable with 2 pshufb + 1 ior. */
34052 if (TARGET_SSSE3)
34053 return true;
34054 /* All implementable with shufpd or unpck[lh]pd. */
34055 if (d.nelt == 2)
34056 return true;
34057 }
34058
34059 vec_mask = extract_vec_perm_cst (&d, mask);
34060
34061 /* This hook is cannot be called in response to something that the
34062 user does (unlike the builtin expander) so we shouldn't ever see
34063 an error generated from the extract. */
34064 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34065 one_vec = (vec_mask != 3);
34066
34067 /* Implementable with shufps or pshufd. */
34068 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34069 return true;
34070
34071 /* Otherwise we have to go through the motions and see if we can
34072 figure out how to generate the requested permutation. */
34073 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34074 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34075 if (!one_vec)
34076 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34077
34078 start_sequence ();
34079 ret = ix86_expand_vec_perm_builtin_1 (&d);
34080 end_sequence ();
34081
34082 return ret;
34083 }
34084
34085 void
34086 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34087 {
34088 struct expand_vec_perm_d d;
34089 unsigned i, nelt;
34090
34091 d.target = targ;
34092 d.op0 = op0;
34093 d.op1 = op1;
34094 d.vmode = GET_MODE (targ);
34095 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34096 d.testing_p = false;
34097
34098 for (i = 0; i < nelt; ++i)
34099 d.perm[i] = i * 2 + odd;
34100
34101 /* We'll either be able to implement the permutation directly... */
34102 if (expand_vec_perm_1 (&d))
34103 return;
34104
34105 /* ... or we use the special-case patterns. */
34106 expand_vec_perm_even_odd_1 (&d, odd);
34107 }
34108
34109 /* Expand an insert into a vector register through pinsr insn.
34110 Return true if successful. */
34111
34112 bool
34113 ix86_expand_pinsr (rtx *operands)
34114 {
34115 rtx dst = operands[0];
34116 rtx src = operands[3];
34117
34118 unsigned int size = INTVAL (operands[1]);
34119 unsigned int pos = INTVAL (operands[2]);
34120
34121 if (GET_CODE (dst) == SUBREG)
34122 {
34123 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34124 dst = SUBREG_REG (dst);
34125 }
34126
34127 if (GET_CODE (src) == SUBREG)
34128 src = SUBREG_REG (src);
34129
34130 switch (GET_MODE (dst))
34131 {
34132 case V16QImode:
34133 case V8HImode:
34134 case V4SImode:
34135 case V2DImode:
34136 {
34137 enum machine_mode srcmode, dstmode;
34138 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34139
34140 srcmode = mode_for_size (size, MODE_INT, 0);
34141
34142 switch (srcmode)
34143 {
34144 case QImode:
34145 if (!TARGET_SSE4_1)
34146 return false;
34147 dstmode = V16QImode;
34148 pinsr = gen_sse4_1_pinsrb;
34149 break;
34150
34151 case HImode:
34152 if (!TARGET_SSE2)
34153 return false;
34154 dstmode = V8HImode;
34155 pinsr = gen_sse2_pinsrw;
34156 break;
34157
34158 case SImode:
34159 if (!TARGET_SSE4_1)
34160 return false;
34161 dstmode = V4SImode;
34162 pinsr = gen_sse4_1_pinsrd;
34163 break;
34164
34165 case DImode:
34166 gcc_assert (TARGET_64BIT);
34167 if (!TARGET_SSE4_1)
34168 return false;
34169 dstmode = V2DImode;
34170 pinsr = gen_sse4_1_pinsrq;
34171 break;
34172
34173 default:
34174 return false;
34175 }
34176
34177 dst = gen_lowpart (dstmode, dst);
34178 src = gen_lowpart (srcmode, src);
34179
34180 pos /= size;
34181
34182 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34183 return true;
34184 }
34185
34186 default:
34187 return false;
34188 }
34189 }
34190 \f
34191 /* This function returns the calling abi specific va_list type node.
34192 It returns the FNDECL specific va_list type. */
34193
34194 static tree
34195 ix86_fn_abi_va_list (tree fndecl)
34196 {
34197 if (!TARGET_64BIT)
34198 return va_list_type_node;
34199 gcc_assert (fndecl != NULL_TREE);
34200
34201 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34202 return ms_va_list_type_node;
34203 else
34204 return sysv_va_list_type_node;
34205 }
34206
34207 /* Returns the canonical va_list type specified by TYPE. If there
34208 is no valid TYPE provided, it return NULL_TREE. */
34209
34210 static tree
34211 ix86_canonical_va_list_type (tree type)
34212 {
34213 tree wtype, htype;
34214
34215 /* Resolve references and pointers to va_list type. */
34216 if (TREE_CODE (type) == MEM_REF)
34217 type = TREE_TYPE (type);
34218 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34219 type = TREE_TYPE (type);
34220 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34221 type = TREE_TYPE (type);
34222
34223 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34224 {
34225 wtype = va_list_type_node;
34226 gcc_assert (wtype != NULL_TREE);
34227 htype = type;
34228 if (TREE_CODE (wtype) == ARRAY_TYPE)
34229 {
34230 /* If va_list is an array type, the argument may have decayed
34231 to a pointer type, e.g. by being passed to another function.
34232 In that case, unwrap both types so that we can compare the
34233 underlying records. */
34234 if (TREE_CODE (htype) == ARRAY_TYPE
34235 || POINTER_TYPE_P (htype))
34236 {
34237 wtype = TREE_TYPE (wtype);
34238 htype = TREE_TYPE (htype);
34239 }
34240 }
34241 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34242 return va_list_type_node;
34243 wtype = sysv_va_list_type_node;
34244 gcc_assert (wtype != NULL_TREE);
34245 htype = type;
34246 if (TREE_CODE (wtype) == ARRAY_TYPE)
34247 {
34248 /* If va_list is an array type, the argument may have decayed
34249 to a pointer type, e.g. by being passed to another function.
34250 In that case, unwrap both types so that we can compare the
34251 underlying records. */
34252 if (TREE_CODE (htype) == ARRAY_TYPE
34253 || POINTER_TYPE_P (htype))
34254 {
34255 wtype = TREE_TYPE (wtype);
34256 htype = TREE_TYPE (htype);
34257 }
34258 }
34259 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34260 return sysv_va_list_type_node;
34261 wtype = ms_va_list_type_node;
34262 gcc_assert (wtype != NULL_TREE);
34263 htype = type;
34264 if (TREE_CODE (wtype) == ARRAY_TYPE)
34265 {
34266 /* If va_list is an array type, the argument may have decayed
34267 to a pointer type, e.g. by being passed to another function.
34268 In that case, unwrap both types so that we can compare the
34269 underlying records. */
34270 if (TREE_CODE (htype) == ARRAY_TYPE
34271 || POINTER_TYPE_P (htype))
34272 {
34273 wtype = TREE_TYPE (wtype);
34274 htype = TREE_TYPE (htype);
34275 }
34276 }
34277 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34278 return ms_va_list_type_node;
34279 return NULL_TREE;
34280 }
34281 return std_canonical_va_list_type (type);
34282 }
34283
34284 /* Iterate through the target-specific builtin types for va_list.
34285 IDX denotes the iterator, *PTREE is set to the result type of
34286 the va_list builtin, and *PNAME to its internal type.
34287 Returns zero if there is no element for this index, otherwise
34288 IDX should be increased upon the next call.
34289 Note, do not iterate a base builtin's name like __builtin_va_list.
34290 Used from c_common_nodes_and_builtins. */
34291
34292 static int
34293 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34294 {
34295 if (TARGET_64BIT)
34296 {
34297 switch (idx)
34298 {
34299 default:
34300 break;
34301
34302 case 0:
34303 *ptree = ms_va_list_type_node;
34304 *pname = "__builtin_ms_va_list";
34305 return 1;
34306
34307 case 1:
34308 *ptree = sysv_va_list_type_node;
34309 *pname = "__builtin_sysv_va_list";
34310 return 1;
34311 }
34312 }
34313
34314 return 0;
34315 }
34316
34317 #undef TARGET_SCHED_DISPATCH
34318 #define TARGET_SCHED_DISPATCH has_dispatch
34319 #undef TARGET_SCHED_DISPATCH_DO
34320 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34321
34322 /* The size of the dispatch window is the total number of bytes of
34323 object code allowed in a window. */
34324 #define DISPATCH_WINDOW_SIZE 16
34325
34326 /* Number of dispatch windows considered for scheduling. */
34327 #define MAX_DISPATCH_WINDOWS 3
34328
34329 /* Maximum number of instructions in a window. */
34330 #define MAX_INSN 4
34331
34332 /* Maximum number of immediate operands in a window. */
34333 #define MAX_IMM 4
34334
34335 /* Maximum number of immediate bits allowed in a window. */
34336 #define MAX_IMM_SIZE 128
34337
34338 /* Maximum number of 32 bit immediates allowed in a window. */
34339 #define MAX_IMM_32 4
34340
34341 /* Maximum number of 64 bit immediates allowed in a window. */
34342 #define MAX_IMM_64 2
34343
34344 /* Maximum total of loads or prefetches allowed in a window. */
34345 #define MAX_LOAD 2
34346
34347 /* Maximum total of stores allowed in a window. */
34348 #define MAX_STORE 1
34349
34350 #undef BIG
34351 #define BIG 100
34352
34353
34354 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34355 enum dispatch_group {
34356 disp_no_group = 0,
34357 disp_load,
34358 disp_store,
34359 disp_load_store,
34360 disp_prefetch,
34361 disp_imm,
34362 disp_imm_32,
34363 disp_imm_64,
34364 disp_branch,
34365 disp_cmp,
34366 disp_jcc,
34367 disp_last
34368 };
34369
34370 /* Number of allowable groups in a dispatch window. It is an array
34371 indexed by dispatch_group enum. 100 is used as a big number,
34372 because the number of these kind of operations does not have any
34373 effect in dispatch window, but we need them for other reasons in
34374 the table. */
34375 static unsigned int num_allowable_groups[disp_last] = {
34376 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34377 };
34378
34379 char group_name[disp_last + 1][16] = {
34380 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34381 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34382 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34383 };
34384
34385 /* Instruction path. */
34386 enum insn_path {
34387 no_path = 0,
34388 path_single, /* Single micro op. */
34389 path_double, /* Double micro op. */
34390 path_multi, /* Instructions with more than 2 micro op.. */
34391 last_path
34392 };
34393
34394 /* sched_insn_info defines a window to the instructions scheduled in
34395 the basic block. It contains a pointer to the insn_info table and
34396 the instruction scheduled.
34397
34398 Windows are allocated for each basic block and are linked
34399 together. */
34400 typedef struct sched_insn_info_s {
34401 rtx insn;
34402 enum dispatch_group group;
34403 enum insn_path path;
34404 int byte_len;
34405 int imm_bytes;
34406 } sched_insn_info;
34407
34408 /* Linked list of dispatch windows. This is a two way list of
34409 dispatch windows of a basic block. It contains information about
34410 the number of uops in the window and the total number of
34411 instructions and of bytes in the object code for this dispatch
34412 window. */
34413 typedef struct dispatch_windows_s {
34414 int num_insn; /* Number of insn in the window. */
34415 int num_uops; /* Number of uops in the window. */
34416 int window_size; /* Number of bytes in the window. */
34417 int window_num; /* Window number between 0 or 1. */
34418 int num_imm; /* Number of immediates in an insn. */
34419 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34420 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34421 int imm_size; /* Total immediates in the window. */
34422 int num_loads; /* Total memory loads in the window. */
34423 int num_stores; /* Total memory stores in the window. */
34424 int violation; /* Violation exists in window. */
34425 sched_insn_info *window; /* Pointer to the window. */
34426 struct dispatch_windows_s *next;
34427 struct dispatch_windows_s *prev;
34428 } dispatch_windows;
34429
34430 /* Immediate valuse used in an insn. */
34431 typedef struct imm_info_s
34432 {
34433 int imm;
34434 int imm32;
34435 int imm64;
34436 } imm_info;
34437
34438 static dispatch_windows *dispatch_window_list;
34439 static dispatch_windows *dispatch_window_list1;
34440
34441 /* Get dispatch group of insn. */
34442
34443 static enum dispatch_group
34444 get_mem_group (rtx insn)
34445 {
34446 enum attr_memory memory;
34447
34448 if (INSN_CODE (insn) < 0)
34449 return disp_no_group;
34450 memory = get_attr_memory (insn);
34451 if (memory == MEMORY_STORE)
34452 return disp_store;
34453
34454 if (memory == MEMORY_LOAD)
34455 return disp_load;
34456
34457 if (memory == MEMORY_BOTH)
34458 return disp_load_store;
34459
34460 return disp_no_group;
34461 }
34462
34463 /* Return true if insn is a compare instruction. */
34464
34465 static bool
34466 is_cmp (rtx insn)
34467 {
34468 enum attr_type type;
34469
34470 type = get_attr_type (insn);
34471 return (type == TYPE_TEST
34472 || type == TYPE_ICMP
34473 || type == TYPE_FCMP
34474 || GET_CODE (PATTERN (insn)) == COMPARE);
34475 }
34476
34477 /* Return true if a dispatch violation encountered. */
34478
34479 static bool
34480 dispatch_violation (void)
34481 {
34482 if (dispatch_window_list->next)
34483 return dispatch_window_list->next->violation;
34484 return dispatch_window_list->violation;
34485 }
34486
34487 /* Return true if insn is a branch instruction. */
34488
34489 static bool
34490 is_branch (rtx insn)
34491 {
34492 return (CALL_P (insn) || JUMP_P (insn));
34493 }
34494
34495 /* Return true if insn is a prefetch instruction. */
34496
34497 static bool
34498 is_prefetch (rtx insn)
34499 {
34500 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34501 }
34502
34503 /* This function initializes a dispatch window and the list container holding a
34504 pointer to the window. */
34505
34506 static void
34507 init_window (int window_num)
34508 {
34509 int i;
34510 dispatch_windows *new_list;
34511
34512 if (window_num == 0)
34513 new_list = dispatch_window_list;
34514 else
34515 new_list = dispatch_window_list1;
34516
34517 new_list->num_insn = 0;
34518 new_list->num_uops = 0;
34519 new_list->window_size = 0;
34520 new_list->next = NULL;
34521 new_list->prev = NULL;
34522 new_list->window_num = window_num;
34523 new_list->num_imm = 0;
34524 new_list->num_imm_32 = 0;
34525 new_list->num_imm_64 = 0;
34526 new_list->imm_size = 0;
34527 new_list->num_loads = 0;
34528 new_list->num_stores = 0;
34529 new_list->violation = false;
34530
34531 for (i = 0; i < MAX_INSN; i++)
34532 {
34533 new_list->window[i].insn = NULL;
34534 new_list->window[i].group = disp_no_group;
34535 new_list->window[i].path = no_path;
34536 new_list->window[i].byte_len = 0;
34537 new_list->window[i].imm_bytes = 0;
34538 }
34539 return;
34540 }
34541
34542 /* This function allocates and initializes a dispatch window and the
34543 list container holding a pointer to the window. */
34544
34545 static dispatch_windows *
34546 allocate_window (void)
34547 {
34548 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34549 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34550
34551 return new_list;
34552 }
34553
34554 /* This routine initializes the dispatch scheduling information. It
34555 initiates building dispatch scheduler tables and constructs the
34556 first dispatch window. */
34557
34558 static void
34559 init_dispatch_sched (void)
34560 {
34561 /* Allocate a dispatch list and a window. */
34562 dispatch_window_list = allocate_window ();
34563 dispatch_window_list1 = allocate_window ();
34564 init_window (0);
34565 init_window (1);
34566 }
34567
34568 /* This function returns true if a branch is detected. End of a basic block
34569 does not have to be a branch, but here we assume only branches end a
34570 window. */
34571
34572 static bool
34573 is_end_basic_block (enum dispatch_group group)
34574 {
34575 return group == disp_branch;
34576 }
34577
34578 /* This function is called when the end of a window processing is reached. */
34579
34580 static void
34581 process_end_window (void)
34582 {
34583 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34584 if (dispatch_window_list->next)
34585 {
34586 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34587 gcc_assert (dispatch_window_list->window_size
34588 + dispatch_window_list1->window_size <= 48);
34589 init_window (1);
34590 }
34591 init_window (0);
34592 }
34593
34594 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34595 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34596 for 48 bytes of instructions. Note that these windows are not dispatch
34597 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34598
34599 static dispatch_windows *
34600 allocate_next_window (int window_num)
34601 {
34602 if (window_num == 0)
34603 {
34604 if (dispatch_window_list->next)
34605 init_window (1);
34606 init_window (0);
34607 return dispatch_window_list;
34608 }
34609
34610 dispatch_window_list->next = dispatch_window_list1;
34611 dispatch_window_list1->prev = dispatch_window_list;
34612
34613 return dispatch_window_list1;
34614 }
34615
34616 /* Increment the number of immediate operands of an instruction. */
34617
34618 static int
34619 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34620 {
34621 if (*in_rtx == 0)
34622 return 0;
34623
34624 switch ( GET_CODE (*in_rtx))
34625 {
34626 case CONST:
34627 case SYMBOL_REF:
34628 case CONST_INT:
34629 (imm_values->imm)++;
34630 if (x86_64_immediate_operand (*in_rtx, SImode))
34631 (imm_values->imm32)++;
34632 else
34633 (imm_values->imm64)++;
34634 break;
34635
34636 case CONST_DOUBLE:
34637 (imm_values->imm)++;
34638 (imm_values->imm64)++;
34639 break;
34640
34641 case CODE_LABEL:
34642 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34643 {
34644 (imm_values->imm)++;
34645 (imm_values->imm32)++;
34646 }
34647 break;
34648
34649 default:
34650 break;
34651 }
34652
34653 return 0;
34654 }
34655
34656 /* Compute number of immediate operands of an instruction. */
34657
34658 static void
34659 find_constant (rtx in_rtx, imm_info *imm_values)
34660 {
34661 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34662 (rtx_function) find_constant_1, (void *) imm_values);
34663 }
34664
34665 /* Return total size of immediate operands of an instruction along with number
34666 of corresponding immediate-operands. It initializes its parameters to zero
34667 befor calling FIND_CONSTANT.
34668 INSN is the input instruction. IMM is the total of immediates.
34669 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34670 bit immediates. */
34671
34672 static int
34673 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34674 {
34675 imm_info imm_values = {0, 0, 0};
34676
34677 find_constant (insn, &imm_values);
34678 *imm = imm_values.imm;
34679 *imm32 = imm_values.imm32;
34680 *imm64 = imm_values.imm64;
34681 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34682 }
34683
34684 /* This function indicates if an operand of an instruction is an
34685 immediate. */
34686
34687 static bool
34688 has_immediate (rtx insn)
34689 {
34690 int num_imm_operand;
34691 int num_imm32_operand;
34692 int num_imm64_operand;
34693
34694 if (insn)
34695 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34696 &num_imm64_operand);
34697 return false;
34698 }
34699
34700 /* Return single or double path for instructions. */
34701
34702 static enum insn_path
34703 get_insn_path (rtx insn)
34704 {
34705 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34706
34707 if ((int)path == 0)
34708 return path_single;
34709
34710 if ((int)path == 1)
34711 return path_double;
34712
34713 return path_multi;
34714 }
34715
34716 /* Return insn dispatch group. */
34717
34718 static enum dispatch_group
34719 get_insn_group (rtx insn)
34720 {
34721 enum dispatch_group group = get_mem_group (insn);
34722 if (group)
34723 return group;
34724
34725 if (is_branch (insn))
34726 return disp_branch;
34727
34728 if (is_cmp (insn))
34729 return disp_cmp;
34730
34731 if (has_immediate (insn))
34732 return disp_imm;
34733
34734 if (is_prefetch (insn))
34735 return disp_prefetch;
34736
34737 return disp_no_group;
34738 }
34739
34740 /* Count number of GROUP restricted instructions in a dispatch
34741 window WINDOW_LIST. */
34742
34743 static int
34744 count_num_restricted (rtx insn, dispatch_windows *window_list)
34745 {
34746 enum dispatch_group group = get_insn_group (insn);
34747 int imm_size;
34748 int num_imm_operand;
34749 int num_imm32_operand;
34750 int num_imm64_operand;
34751
34752 if (group == disp_no_group)
34753 return 0;
34754
34755 if (group == disp_imm)
34756 {
34757 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34758 &num_imm64_operand);
34759 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34760 || num_imm_operand + window_list->num_imm > MAX_IMM
34761 || (num_imm32_operand > 0
34762 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34763 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34764 || (num_imm64_operand > 0
34765 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34766 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34767 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34768 && num_imm64_operand > 0
34769 && ((window_list->num_imm_64 > 0
34770 && window_list->num_insn >= 2)
34771 || window_list->num_insn >= 3)))
34772 return BIG;
34773
34774 return 1;
34775 }
34776
34777 if ((group == disp_load_store
34778 && (window_list->num_loads >= MAX_LOAD
34779 || window_list->num_stores >= MAX_STORE))
34780 || ((group == disp_load
34781 || group == disp_prefetch)
34782 && window_list->num_loads >= MAX_LOAD)
34783 || (group == disp_store
34784 && window_list->num_stores >= MAX_STORE))
34785 return BIG;
34786
34787 return 1;
34788 }
34789
34790 /* This function returns true if insn satisfies dispatch rules on the
34791 last window scheduled. */
34792
34793 static bool
34794 fits_dispatch_window (rtx insn)
34795 {
34796 dispatch_windows *window_list = dispatch_window_list;
34797 dispatch_windows *window_list_next = dispatch_window_list->next;
34798 unsigned int num_restrict;
34799 enum dispatch_group group = get_insn_group (insn);
34800 enum insn_path path = get_insn_path (insn);
34801 int sum;
34802
34803 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34804 instructions should be given the lowest priority in the
34805 scheduling process in Haifa scheduler to make sure they will be
34806 scheduled in the same dispatch window as the refrence to them. */
34807 if (group == disp_jcc || group == disp_cmp)
34808 return false;
34809
34810 /* Check nonrestricted. */
34811 if (group == disp_no_group || group == disp_branch)
34812 return true;
34813
34814 /* Get last dispatch window. */
34815 if (window_list_next)
34816 window_list = window_list_next;
34817
34818 if (window_list->window_num == 1)
34819 {
34820 sum = window_list->prev->window_size + window_list->window_size;
34821
34822 if (sum == 32
34823 || (min_insn_size (insn) + sum) >= 48)
34824 /* Window 1 is full. Go for next window. */
34825 return true;
34826 }
34827
34828 num_restrict = count_num_restricted (insn, window_list);
34829
34830 if (num_restrict > num_allowable_groups[group])
34831 return false;
34832
34833 /* See if it fits in the first window. */
34834 if (window_list->window_num == 0)
34835 {
34836 /* The first widow should have only single and double path
34837 uops. */
34838 if (path == path_double
34839 && (window_list->num_uops + 2) > MAX_INSN)
34840 return false;
34841 else if (path != path_single)
34842 return false;
34843 }
34844 return true;
34845 }
34846
34847 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34848 dispatch window WINDOW_LIST. */
34849
34850 static void
34851 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34852 {
34853 int byte_len = min_insn_size (insn);
34854 int num_insn = window_list->num_insn;
34855 int imm_size;
34856 sched_insn_info *window = window_list->window;
34857 enum dispatch_group group = get_insn_group (insn);
34858 enum insn_path path = get_insn_path (insn);
34859 int num_imm_operand;
34860 int num_imm32_operand;
34861 int num_imm64_operand;
34862
34863 if (!window_list->violation && group != disp_cmp
34864 && !fits_dispatch_window (insn))
34865 window_list->violation = true;
34866
34867 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34868 &num_imm64_operand);
34869
34870 /* Initialize window with new instruction. */
34871 window[num_insn].insn = insn;
34872 window[num_insn].byte_len = byte_len;
34873 window[num_insn].group = group;
34874 window[num_insn].path = path;
34875 window[num_insn].imm_bytes = imm_size;
34876
34877 window_list->window_size += byte_len;
34878 window_list->num_insn = num_insn + 1;
34879 window_list->num_uops = window_list->num_uops + num_uops;
34880 window_list->imm_size += imm_size;
34881 window_list->num_imm += num_imm_operand;
34882 window_list->num_imm_32 += num_imm32_operand;
34883 window_list->num_imm_64 += num_imm64_operand;
34884
34885 if (group == disp_store)
34886 window_list->num_stores += 1;
34887 else if (group == disp_load
34888 || group == disp_prefetch)
34889 window_list->num_loads += 1;
34890 else if (group == disp_load_store)
34891 {
34892 window_list->num_stores += 1;
34893 window_list->num_loads += 1;
34894 }
34895 }
34896
34897 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34898 If the total bytes of instructions or the number of instructions in
34899 the window exceed allowable, it allocates a new window. */
34900
34901 static void
34902 add_to_dispatch_window (rtx insn)
34903 {
34904 int byte_len;
34905 dispatch_windows *window_list;
34906 dispatch_windows *next_list;
34907 dispatch_windows *window0_list;
34908 enum insn_path path;
34909 enum dispatch_group insn_group;
34910 bool insn_fits;
34911 int num_insn;
34912 int num_uops;
34913 int window_num;
34914 int insn_num_uops;
34915 int sum;
34916
34917 if (INSN_CODE (insn) < 0)
34918 return;
34919
34920 byte_len = min_insn_size (insn);
34921 window_list = dispatch_window_list;
34922 next_list = window_list->next;
34923 path = get_insn_path (insn);
34924 insn_group = get_insn_group (insn);
34925
34926 /* Get the last dispatch window. */
34927 if (next_list)
34928 window_list = dispatch_window_list->next;
34929
34930 if (path == path_single)
34931 insn_num_uops = 1;
34932 else if (path == path_double)
34933 insn_num_uops = 2;
34934 else
34935 insn_num_uops = (int) path;
34936
34937 /* If current window is full, get a new window.
34938 Window number zero is full, if MAX_INSN uops are scheduled in it.
34939 Window number one is full, if window zero's bytes plus window
34940 one's bytes is 32, or if the bytes of the new instruction added
34941 to the total makes it greater than 48, or it has already MAX_INSN
34942 instructions in it. */
34943 num_insn = window_list->num_insn;
34944 num_uops = window_list->num_uops;
34945 window_num = window_list->window_num;
34946 insn_fits = fits_dispatch_window (insn);
34947
34948 if (num_insn >= MAX_INSN
34949 || num_uops + insn_num_uops > MAX_INSN
34950 || !(insn_fits))
34951 {
34952 window_num = ~window_num & 1;
34953 window_list = allocate_next_window (window_num);
34954 }
34955
34956 if (window_num == 0)
34957 {
34958 add_insn_window (insn, window_list, insn_num_uops);
34959 if (window_list->num_insn >= MAX_INSN
34960 && insn_group == disp_branch)
34961 {
34962 process_end_window ();
34963 return;
34964 }
34965 }
34966 else if (window_num == 1)
34967 {
34968 window0_list = window_list->prev;
34969 sum = window0_list->window_size + window_list->window_size;
34970 if (sum == 32
34971 || (byte_len + sum) >= 48)
34972 {
34973 process_end_window ();
34974 window_list = dispatch_window_list;
34975 }
34976
34977 add_insn_window (insn, window_list, insn_num_uops);
34978 }
34979 else
34980 gcc_unreachable ();
34981
34982 if (is_end_basic_block (insn_group))
34983 {
34984 /* End of basic block is reached do end-basic-block process. */
34985 process_end_window ();
34986 return;
34987 }
34988 }
34989
34990 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34991
34992 DEBUG_FUNCTION static void
34993 debug_dispatch_window_file (FILE *file, int window_num)
34994 {
34995 dispatch_windows *list;
34996 int i;
34997
34998 if (window_num == 0)
34999 list = dispatch_window_list;
35000 else
35001 list = dispatch_window_list1;
35002
35003 fprintf (file, "Window #%d:\n", list->window_num);
35004 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
35005 list->num_insn, list->num_uops, list->window_size);
35006 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35007 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
35008
35009 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
35010 list->num_stores);
35011 fprintf (file, " insn info:\n");
35012
35013 for (i = 0; i < MAX_INSN; i++)
35014 {
35015 if (!list->window[i].insn)
35016 break;
35017 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
35018 i, group_name[list->window[i].group],
35019 i, (void *)list->window[i].insn,
35020 i, list->window[i].path,
35021 i, list->window[i].byte_len,
35022 i, list->window[i].imm_bytes);
35023 }
35024 }
35025
35026 /* Print to stdout a dispatch window. */
35027
35028 DEBUG_FUNCTION void
35029 debug_dispatch_window (int window_num)
35030 {
35031 debug_dispatch_window_file (stdout, window_num);
35032 }
35033
35034 /* Print INSN dispatch information to FILE. */
35035
35036 DEBUG_FUNCTION static void
35037 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35038 {
35039 int byte_len;
35040 enum insn_path path;
35041 enum dispatch_group group;
35042 int imm_size;
35043 int num_imm_operand;
35044 int num_imm32_operand;
35045 int num_imm64_operand;
35046
35047 if (INSN_CODE (insn) < 0)
35048 return;
35049
35050 byte_len = min_insn_size (insn);
35051 path = get_insn_path (insn);
35052 group = get_insn_group (insn);
35053 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35054 &num_imm64_operand);
35055
35056 fprintf (file, " insn info:\n");
35057 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35058 group_name[group], path, byte_len);
35059 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35060 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35061 }
35062
35063 /* Print to STDERR the status of the ready list with respect to
35064 dispatch windows. */
35065
35066 DEBUG_FUNCTION void
35067 debug_ready_dispatch (void)
35068 {
35069 int i;
35070 int no_ready = number_in_ready ();
35071
35072 fprintf (stdout, "Number of ready: %d\n", no_ready);
35073
35074 for (i = 0; i < no_ready; i++)
35075 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35076 }
35077
35078 /* This routine is the driver of the dispatch scheduler. */
35079
35080 static void
35081 do_dispatch (rtx insn, int mode)
35082 {
35083 if (mode == DISPATCH_INIT)
35084 init_dispatch_sched ();
35085 else if (mode == ADD_TO_DISPATCH_WINDOW)
35086 add_to_dispatch_window (insn);
35087 }
35088
35089 /* Return TRUE if Dispatch Scheduling is supported. */
35090
35091 static bool
35092 has_dispatch (rtx insn, int action)
35093 {
35094 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
35095 switch (action)
35096 {
35097 default:
35098 return false;
35099
35100 case IS_DISPATCH_ON:
35101 return true;
35102 break;
35103
35104 case IS_CMP:
35105 return is_cmp (insn);
35106
35107 case DISPATCH_VIOLATION:
35108 return dispatch_violation ();
35109
35110 case FITS_DISPATCH_WINDOW:
35111 return fits_dispatch_window (insn);
35112 }
35113
35114 return false;
35115 }
35116
35117 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35118 place emms and femms instructions. */
35119
35120 static enum machine_mode
35121 ix86_preferred_simd_mode (enum machine_mode mode)
35122 {
35123 /* Disable double precision vectorizer if needed. */
35124 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
35125 return word_mode;
35126
35127 if (!TARGET_AVX && !TARGET_SSE)
35128 return word_mode;
35129
35130 switch (mode)
35131 {
35132 case SFmode:
35133 return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
35134 case DFmode:
35135 return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
35136 case DImode:
35137 return V2DImode;
35138 case SImode:
35139 return V4SImode;
35140 case HImode:
35141 return V8HImode;
35142 case QImode:
35143 return V16QImode;
35144
35145 default:;
35146 }
35147
35148 return word_mode;
35149 }
35150
35151 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35152 vectors. */
35153
35154 static unsigned int
35155 ix86_autovectorize_vector_sizes (void)
35156 {
35157 return TARGET_AVX ? 32 | 16 : 0;
35158 }
35159
35160 /* Initialize the GCC target structure. */
35161 #undef TARGET_RETURN_IN_MEMORY
35162 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35163
35164 #undef TARGET_LEGITIMIZE_ADDRESS
35165 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35166
35167 #undef TARGET_ATTRIBUTE_TABLE
35168 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35169 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35170 # undef TARGET_MERGE_DECL_ATTRIBUTES
35171 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35172 #endif
35173
35174 #undef TARGET_COMP_TYPE_ATTRIBUTES
35175 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35176
35177 #undef TARGET_INIT_BUILTINS
35178 #define TARGET_INIT_BUILTINS ix86_init_builtins
35179 #undef TARGET_BUILTIN_DECL
35180 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35181 #undef TARGET_EXPAND_BUILTIN
35182 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35183
35184 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35185 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35186 ix86_builtin_vectorized_function
35187
35188 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35189 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35190
35191 #undef TARGET_BUILTIN_RECIPROCAL
35192 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35193
35194 #undef TARGET_ASM_FUNCTION_EPILOGUE
35195 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35196
35197 #undef TARGET_ENCODE_SECTION_INFO
35198 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35199 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35200 #else
35201 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35202 #endif
35203
35204 #undef TARGET_ASM_OPEN_PAREN
35205 #define TARGET_ASM_OPEN_PAREN ""
35206 #undef TARGET_ASM_CLOSE_PAREN
35207 #define TARGET_ASM_CLOSE_PAREN ""
35208
35209 #undef TARGET_ASM_BYTE_OP
35210 #define TARGET_ASM_BYTE_OP ASM_BYTE
35211
35212 #undef TARGET_ASM_ALIGNED_HI_OP
35213 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35214 #undef TARGET_ASM_ALIGNED_SI_OP
35215 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35216 #ifdef ASM_QUAD
35217 #undef TARGET_ASM_ALIGNED_DI_OP
35218 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35219 #endif
35220
35221 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35222 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35223
35224 #undef TARGET_ASM_UNALIGNED_HI_OP
35225 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35226 #undef TARGET_ASM_UNALIGNED_SI_OP
35227 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35228 #undef TARGET_ASM_UNALIGNED_DI_OP
35229 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35230
35231 #undef TARGET_PRINT_OPERAND
35232 #define TARGET_PRINT_OPERAND ix86_print_operand
35233 #undef TARGET_PRINT_OPERAND_ADDRESS
35234 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35235 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35236 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35237 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35238 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35239
35240 #undef TARGET_SCHED_INIT_GLOBAL
35241 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35242 #undef TARGET_SCHED_ADJUST_COST
35243 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35244 #undef TARGET_SCHED_ISSUE_RATE
35245 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35246 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35247 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35248 ia32_multipass_dfa_lookahead
35249
35250 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35251 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35252
35253 #ifdef HAVE_AS_TLS
35254 #undef TARGET_HAVE_TLS
35255 #define TARGET_HAVE_TLS true
35256 #endif
35257 #undef TARGET_CANNOT_FORCE_CONST_MEM
35258 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35259 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35260 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35261
35262 #undef TARGET_DELEGITIMIZE_ADDRESS
35263 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35264
35265 #undef TARGET_MS_BITFIELD_LAYOUT_P
35266 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35267
35268 #if TARGET_MACHO
35269 #undef TARGET_BINDS_LOCAL_P
35270 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35271 #endif
35272 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35273 #undef TARGET_BINDS_LOCAL_P
35274 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35275 #endif
35276
35277 #undef TARGET_ASM_OUTPUT_MI_THUNK
35278 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35279 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35280 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35281
35282 #undef TARGET_ASM_FILE_START
35283 #define TARGET_ASM_FILE_START x86_file_start
35284
35285 #undef TARGET_DEFAULT_TARGET_FLAGS
35286 #define TARGET_DEFAULT_TARGET_FLAGS \
35287 (TARGET_DEFAULT \
35288 | TARGET_SUBTARGET_DEFAULT \
35289 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35290
35291 #undef TARGET_HANDLE_OPTION
35292 #define TARGET_HANDLE_OPTION ix86_handle_option
35293
35294 #undef TARGET_OPTION_OVERRIDE
35295 #define TARGET_OPTION_OVERRIDE ix86_option_override
35296 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35297 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35298 #undef TARGET_OPTION_INIT_STRUCT
35299 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35300
35301 #undef TARGET_REGISTER_MOVE_COST
35302 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35303 #undef TARGET_MEMORY_MOVE_COST
35304 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35305 #undef TARGET_RTX_COSTS
35306 #define TARGET_RTX_COSTS ix86_rtx_costs
35307 #undef TARGET_ADDRESS_COST
35308 #define TARGET_ADDRESS_COST ix86_address_cost
35309
35310 #undef TARGET_FIXED_CONDITION_CODE_REGS
35311 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35312 #undef TARGET_CC_MODES_COMPATIBLE
35313 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35314
35315 #undef TARGET_MACHINE_DEPENDENT_REORG
35316 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35317
35318 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35319 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35320
35321 #undef TARGET_BUILD_BUILTIN_VA_LIST
35322 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35323
35324 #undef TARGET_ENUM_VA_LIST_P
35325 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35326
35327 #undef TARGET_FN_ABI_VA_LIST
35328 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35329
35330 #undef TARGET_CANONICAL_VA_LIST_TYPE
35331 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35332
35333 #undef TARGET_EXPAND_BUILTIN_VA_START
35334 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35335
35336 #undef TARGET_MD_ASM_CLOBBERS
35337 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35338
35339 #undef TARGET_PROMOTE_PROTOTYPES
35340 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35341 #undef TARGET_STRUCT_VALUE_RTX
35342 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35343 #undef TARGET_SETUP_INCOMING_VARARGS
35344 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35345 #undef TARGET_MUST_PASS_IN_STACK
35346 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35347 #undef TARGET_FUNCTION_ARG_ADVANCE
35348 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35349 #undef TARGET_FUNCTION_ARG
35350 #define TARGET_FUNCTION_ARG ix86_function_arg
35351 #undef TARGET_FUNCTION_ARG_BOUNDARY
35352 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35353 #undef TARGET_PASS_BY_REFERENCE
35354 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35355 #undef TARGET_INTERNAL_ARG_POINTER
35356 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35357 #undef TARGET_UPDATE_STACK_BOUNDARY
35358 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35359 #undef TARGET_GET_DRAP_RTX
35360 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35361 #undef TARGET_STRICT_ARGUMENT_NAMING
35362 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35363 #undef TARGET_STATIC_CHAIN
35364 #define TARGET_STATIC_CHAIN ix86_static_chain
35365 #undef TARGET_TRAMPOLINE_INIT
35366 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35367 #undef TARGET_RETURN_POPS_ARGS
35368 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35369
35370 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35371 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35372
35373 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35374 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35375
35376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35377 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35378
35379 #undef TARGET_C_MODE_FOR_SUFFIX
35380 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35381
35382 #ifdef HAVE_AS_TLS
35383 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35384 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35385 #endif
35386
35387 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35388 #undef TARGET_INSERT_ATTRIBUTES
35389 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35390 #endif
35391
35392 #undef TARGET_MANGLE_TYPE
35393 #define TARGET_MANGLE_TYPE ix86_mangle_type
35394
35395 #undef TARGET_STACK_PROTECT_FAIL
35396 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35397
35398 #undef TARGET_SUPPORTS_SPLIT_STACK
35399 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35400
35401 #undef TARGET_FUNCTION_VALUE
35402 #define TARGET_FUNCTION_VALUE ix86_function_value
35403
35404 #undef TARGET_FUNCTION_VALUE_REGNO_P
35405 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35406
35407 #undef TARGET_SECONDARY_RELOAD
35408 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35409
35410 #undef TARGET_PREFERRED_RELOAD_CLASS
35411 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35412 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35413 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35414 #undef TARGET_CLASS_LIKELY_SPILLED_P
35415 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35416
35417 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35418 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35419 ix86_builtin_vectorization_cost
35420 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35421 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35422 ix86_vectorize_builtin_vec_perm
35423 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35424 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35425 ix86_vectorize_builtin_vec_perm_ok
35426 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35427 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35428 ix86_preferred_simd_mode
35429 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35430 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35431 ix86_autovectorize_vector_sizes
35432
35433 #undef TARGET_SET_CURRENT_FUNCTION
35434 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35435
35436 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35437 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35438
35439 #undef TARGET_OPTION_SAVE
35440 #define TARGET_OPTION_SAVE ix86_function_specific_save
35441
35442 #undef TARGET_OPTION_RESTORE
35443 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35444
35445 #undef TARGET_OPTION_PRINT
35446 #define TARGET_OPTION_PRINT ix86_function_specific_print
35447
35448 #undef TARGET_CAN_INLINE_P
35449 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35450
35451 #undef TARGET_EXPAND_TO_RTL_HOOK
35452 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35453
35454 #undef TARGET_LEGITIMATE_ADDRESS_P
35455 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35456
35457 #undef TARGET_FRAME_POINTER_REQUIRED
35458 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35459
35460 #undef TARGET_CAN_ELIMINATE
35461 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35462
35463 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35464 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35465
35466 #undef TARGET_ASM_CODE_END
35467 #define TARGET_ASM_CODE_END ix86_code_end
35468
35469 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35470 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35471
35472 #if TARGET_MACHO
35473 #undef TARGET_INIT_LIBFUNCS
35474 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35475 #endif
35476
35477 struct gcc_target targetm = TARGET_INITIALIZER;
35478 \f
35479 #include "gt-i386.h"